Example #1
0
def load_seismic_volume(filename, key, dist, use_hdf5):
    ''' Load the seismic volume, from HDF5 or .dnpy files. '''
    # Create context.
    context = Context()
    if use_hdf5:
        print('Loading from .hdf5 file...')
        da = load_hdf5_distarray(context, filename, key, dist)
    else:
        print('Loading from .dnpy files...')
        da = load_dnpy_distarray(context, filename)
    # Print some stuff about the array.
    if False:
        dump_distarray_info(da)
    return da
Example #2
0
 def _transport_pre_plugin_list_run(self):
     self.n_processes = \
         self.exp.meta_data.plugin_list._get_n_processing_plugins()
     self.context = Context(targets=self.targets)
     closing(self.context).__enter__()
Example #3
0
 def _transport_initialise(self, options):
     # self.exp is not available here
     MPI_setup(options)  # change this?
     with closing(Context()) as context:
         self.targets = context.targets  # set mpi logging here?
Example #4
0
class DistArrayTransport(TransportControl):

    def __init__(self):
        self.targets = None
        self.context = None
        self.n_processes = None
        self.count = None
        self.history = []

    def _transport_initialise(self, options):
        # self.exp is not available here
        MPI_setup(options)  # change this?
        with closing(Context()) as context:
            self.targets = context.targets  # set mpi logging here?

    def _transport_pre_plugin_list_run(self):
        self.n_processes = \
            self.exp.meta_data.plugin_list._get_n_processing_plugins()
        self.context = Context(targets=self.targets)
        closing(self.context).__enter__()

    def _transport_pre_plugin(self):
        # store all datasets and associated patterns
        self.__update_history(self.exp.index)
        self.__distribute_arrays(self.exp.index)

    def _transport_post_plugin(self):
        # if you wish to output datasets that have been removed from the index
        # then do that here (data.remove is True)
        pass

    def _transport_post_plugin_list_run(self):
        # convert distarrays to hdf5
        for data in self.exp.index['in_data'].values():
            name = data.get_name()
            fname = self.exp.meta_data.get('filename')[name]
            gname = self.exp.meta_data.get('group_name')[name]
            data.data.context.save_hdf5(fname, data.data, gname, mode='w')
            self.exp._get_experiment_collection()['saver_plugin']\
                ._open_read_only(data, fname, gname)
        closing(self.context).__exit__()

    def __update_history(self, data_index):
        for dtype, data_dict in data_index.iteritems():
            for name, dobj in data_dict.iteritems():
                pattern = dobj._get_plugin_data().get_pattern()
                self.history.append({name: pattern})

    def __distribute_arrays(self, data_index):
        if not self.history:
            self.__load_data_from_hdf5(data_index['in_data'])  # expand this later for other types (or first set should always be treated as hdf5 dataset?)
            # - i.e. get data as before directly from file and output to distributed array
        else:
            self.__redistribute_data(data_index['in_data'])
        self.__create_out_data(data_index['out_data'])

    def __redistribute_data(self, data_list):
        """ Calculate the pattern distributions and if they are not the same\
        redistribute.
        """
        for data in data_list.values():
            patterns = self.__get_distribution_history(data.get_name())

        if patterns[0] != patterns[1]:
            temp = data.data.toarray()
            # *** temporarily creating ndarray
            # distarray (create empty dist array and populate?)
            distribution = \
                Distribution(self.context, data.get_shape(), patterns[-1])  # currently redundant
            data.data = self.context.fromarray(temp, patterns[-1])

    def __load_data_from_hdf5(self, data_list):
        ''' Create a distarray from the specified section of the HDF5 file. '''

        for data in data_list:
            input_file = data.backing_file.filename
            dist = self.__calculate_distribution(
                data._get_plugin_data().get_pattern())
            distribution = \
                Distribution(self.context, data.get_shape(), dist=dist)
            data.data = self.context.load_hdf5(
                input_file, distribution=distribution, key=data.name)

    def __create_out_data(self, out_data):
        for data in out_data.values():
            dist = self.__calculate_distribution(
                data._get_plugin_data().get_pattern())
            dist = Distribution(self.context, data.get_shape(), dist)
            data.data = self.context.zeros(dist, dtype=np.int32)

    def __get_distribution_history(self, name):
        hist = [i for i in range(len(self.history)) if
                self.history[i].keys()[0] == name][-2:]
        return [self.__calculate_distribution(
            self.history[p].values()[0]) for p in hist]

    def __calculate_distribution(self, pattern):
        core_dirs = pattern.values()[0]['core_dir']
        slice_dirs = pattern.values()[0]['slice_dir']
        nDims = len(core_dirs + slice_dirs)
        dist = ['n']*nDims
        for sl in slice_dirs:
            dist[sl] = 'b'
        return ''.join(dist)

    def _transport_process(self, plugin):
        #self.distributed_process(self.process, plugin)
        print self.testing
        pickler.dump(self)
        self.distributed_process()

    def distributed_process(self, kernel):
        self.context.register(kernel)
        iters_key = \
            self.context.apply(self.local_process, (), {'kernel': kernel})
        return iters_key

    def local_process(frames, output, params, kernel):
        from distarray.localapi import LocalArray
        recon = kernel(frames, output, params)
        res = LocalArray(output.distribution, buf=recon)
        return proxyize(res)  # noqa

    def testing(self):
        print "running the testing function"
Example #5
0
def cli(cmd):
    """
    Process command line arguments, set default params, and do_julia_runs.

    Parameters
    ----------
    cmd : list of str
        sys.argv
    """
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument('resolution_list',
                        metavar='N',
                        type=int,
                        nargs='+',
                        help="resolutions of the Julia set to benchmark (NxN)")
    parser.add_argument("-r",
                        "--repeat",
                        type=int,
                        dest='repeat_count',
                        default=3,
                        help=("number of repetitions of each unique parameter "
                              "set, default: 3"))
    parser.add_argument("-o",
                        "--output-filename",
                        type=str,
                        dest='output_filename',
                        default='out.json',
                        help=("filename to write the json data to."))
    parser.add_argument("-k",
                        "--kernel",
                        type=str,
                        default='fancy',
                        choices=("fancy", "numpy", "cython"),
                        help=("kernel to use for computation.  "
                              "Options are 'fancy', 'numpy', or 'cython'."))
    parser.add_argument(
        "-s",
        "--scaling",
        type=str,
        default="strong",
        choices=("strong", "weak"),
        help=("Kind of scaling test.  Options are 'strong' or 'weak'"))
    args = parser.parse_args()

    ## Default parameters
    with closing(Context()) as context:
        # use all available targets
        engine_count_list = list(range(1, len(context.targets) + 1))
    dist_list = ['bn', 'cn', 'bb', 'cc']
    c_list = [complex(-0.045, 0.45)]  # This Julia set has many points inside
    # needing all iterations.
    re_ax = (-1.5, 1.5)
    im_ax = (-1.5, 1.5)
    z_max = 2.0
    n_max = 100

    fn_from_kernel = {
        'fancy': fancy_numpy_julia_calc,
        'numpy': numpy_julia_calc
    }

    if args.kernel == 'cython':
        from kernel import cython_julia_calc
        fn_from_kernel['cython'] = cython_julia_calc

    results = do_julia_runs(args.repeat_count,
                            engine_count_list,
                            dist_list,
                            args.resolution_list,
                            c_list,
                            re_ax,
                            im_ax,
                            z_max,
                            n_max,
                            output_filename=args.output_filename,
                            kernel=fn_from_kernel[args.kernel],
                            scaling=args.scaling)
Example #6
0
def do_julia_runs(repeat_count,
                  engine_count_list,
                  dist_list,
                  resolution_list,
                  c_list,
                  re_ax,
                  im_ax,
                  z_max,
                  n_max,
                  output_filename,
                  kernel=fancy_numpy_julia_calc,
                  scaling="strong"):
    """Perform a series of Julia set calculations, and print the results.

    Loop over all parameter lists.

    Parameters
    ----------
    repeat_count : int
        Number of times to repeat each unique parameter set.  Later we can take
        the average or minimum of these values to reduce noise in the output.
    engine_count_list : list of int
        List of numbers of engines to test.  Example: list(range(1, 5))
    dist_list : list of 2-element sequences
        List of distribution types to test.  Example: ['bn', 'cn', 'bb', 'cc']
    resolution_list = list of int
        List of resolutions of Julia set to test.
    c_list : list of complex
        Constants to use to compute Julia set.
        Example: [complex(-0.045, 0.45)]
    re_ax : 2-tuple of float
        Min and max for real axis.
    im_ax : 2-tuple of float
        Min and max for imaginary axis.
    z_max : float
        Size of number that we consider as going off to infinity.  I think that
        2.0 is sufficient to be sure that the point will escape.
    n_max : int
        Maximum iteration counts. Points in the set will hit this limit, so
        increasing this has a large effect on the run-time.
    output_filename : str
    kernel : function
        Kernel to use for computation of the Julia set.  Options are 'fancy',
        'numpy', or 'cython'.
    scaling: str, either "strong" or "weak"
    """
    max_engine_count = max(engine_count_list)
    with closing(Context()) as context:
        # Check that we have enough engines available.
        num_engines = len(context.targets)
    if max_engine_count > num_engines:
        msg = 'Require %d engines, but only %d are available.' % (
            max_engine_count, num_engines)
        raise ValueError(msg)

    # Loop over everything and time the calculations.
    results = []
    hdr = (('Start', 'End', 'Dist', 'Resolution', 'c', 'Engines', 'Iters'))
    print("(n/n_runs: time)", hdr)
    # progress stats
    n_regular_runs = repeat_count * (len(resolution_list) * len(c_list) *
                                     len(engine_count_list) * len(dist_list))
    n_numpy_runs = repeat_count * (len(resolution_list) * len(c_list))
    n_runs = n_regular_runs + n_numpy_runs
    prog_fmt = "({:d}/{:d}: {:0.3f}s)"
    n = 0
    for i in range(repeat_count):
        for resolution in resolution_list:
            dimensions = (resolution, resolution)
            for c in c_list:
                with closing(Context(targets=[0])) as context:
                    # numpy julia run
                    complex_plane = create_complex_plane(
                        context, dimensions, 'bn', re_ax, im_ax)
                    result = do_julia_run(context,
                                          'numpy',
                                          dimensions,
                                          c,
                                          complex_plane,
                                          z_max,
                                          n_max,
                                          benchmark_numpy=True,
                                          kernel=kernel)
                    results.append({h: r for h, r in zip(hdr, result)})
                    n += 1
                    print(prog_fmt.format(n, n_runs, result[1] - result[0]),
                          result)
                for engine_count in engine_count_list:
                    if scaling == "weak":
                        factor = sqrt(engine_count)
                        dimensions = (int(floor(resolution * factor)), ) * 2
                    for dist in dist_list:
                        targets = list(range(engine_count))
                        with closing(Context(targets=targets)) as context:
                            context.register(kernel)
                            complex_plane = create_complex_plane(
                                context, dimensions, dist, re_ax, im_ax)
                            result = do_julia_run(context,
                                                  dist,
                                                  dimensions,
                                                  c,
                                                  complex_plane,
                                                  z_max,
                                                  n_max,
                                                  benchmark_numpy=False,
                                                  kernel=kernel)
                            results.append({h: r for h, r in zip(hdr, result)})
                            n += 1
                            print(
                                prog_fmt.format(n, n_runs,
                                                result[1] - result[0]), result)
                            with open(output_filename, 'wt') as fp:
                                json.dump(results,
                                          fp,
                                          sort_keys=True,
                                          indent=4,
                                          separators=(',', ': '))
    return results
Example #7
0
"""
Script to test launching an MPI-only client.

    $ mpiexec -np <np> python launch_mpi.py

If exits cleanly, then everything is fine.  If exits with an error code, then
there's a problem.

"""

from __future__ import print_function
from distarray.globalapi import Context, Distribution
import numpy as np

c = Context(kind='MPI')

fmt = lambda s: "{:.<25s}:".format(s)

print(fmt("Context"), c)
print(fmt("targets"), c.targets)

if __name__ == '__main__':
    size = len(c.targets) * 100
    print(fmt("size"), size)
    dist = Distribution(c, (size,))
    print(fmt("Distribution"), dist)
    da = c.ones(dist, dtype=np.int64)
    print(fmt("DistArray"), da)
    factor = 2
    db = da * factor
    print(fmt("DistArray"), db)
Example #8
0
# ---------------------------------------------------------------------------
#  Copyright (C) 2008-2014, IPython Development Team and Enthought, Inc.
#  Distributed under the terms of the BSD License.  See COPYING.rst.
# ---------------------------------------------------------------------------
"""
Estimate pi using a Monte Carlo method with distarray.
"""

from __future__ import division, print_function

from util import timer

from distarray.globalapi import Context, Distribution, hypot
from distarray.globalapi.random import Random

context = Context()
random = Random(context)


@timer
def calc_pi(n):
    """Estimate pi using distributed NumPy arrays."""
    distribution = Distribution(context=context, shape=(n, ))
    x = random.rand(distribution)
    y = random.rand(distribution)
    r = hypot(x, y)
    mask = (r < 1)
    return 4 * mask.sum().toarray() / n


def main(N):
Example #9
0
'''
Needs IPython da cluster running:
dacluster start -n4
'''

def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        print 'Time: %2.6f sec' % (te-ts)
        return result
    return timed

context = Context()



@timeit
def task_np(arr):
    return (np.sin(arr) + np.cos(arr)).sum(axis=1) / arr.sum(axis=2)

@timeit
def task_da(arr):
    return (da.sin(arr) + da.cos(arr)).sum(axis=1) / arr.sum(axis=2)

N = 400

np_arr = np.random.random_sample(size=(N,N,N))