Esempio n. 1
0
def _process_common(args, mesh, soln, cfg):
    # Prefork to allow us to exec processes after MPI is initialised
    if hasattr(os, 'fork'):
        from pytools.prefork import enable_prefork

        enable_prefork()

    # Import but do not initialise MPI
    from mpi4py import MPI

    # Manually initialise MPI
    MPI.Init()

    # Ensure MPI is suitably cleaned up
    register_finalize_handler()

    # Create a backend
    backend = get_backend(args.backend, cfg)

    # Get the mapping from physical ranks to MPI ranks
    rallocs = get_rank_allocation(mesh, cfg)

    # Construct the solver
    solver = get_solver(backend, rallocs, mesh, soln, cfg)

    # If we are running interactively then create a progress bar
    if args.progress and MPI.COMM_WORLD.rank == 0:
        pb = ProgressBar(solver.tstart, solver.tcurr, solver.tend)

        # Register a callback to update the bar after each step
        callb = lambda intg: pb.advance_to(intg.tcurr)
        solver.completed_step_handlers.append(callb)

    # Execute!
    solver.run()
Esempio n. 2
0
 def __init__(self):
     if not MPI.Is_initialized():
         print("Manual MPI_Init performed.")
         MPI.Init()
     self.comm = MPI.COMM_WORLD
     self.rank = self.comm.Get_rank()
     self.size = self.comm.Get_size()
Esempio n. 3
0
def __main__():

    if hasattr(os, 'fork'):
        from pytools.prefork import enable_prefork
        enable_prefork()

    # Define MPI communication world
    from mpi4py import MPI
    MPI.Init()

    # define the local rank based cuda device
    print("Local rank", get_local_rank())
    os.environ.pop('CUDA_DEVICE', None)
    devid = get_local_rank()
    os.environ['CUDA_DEVICE'] = str(devid)

    # CUDA device number (used by pycuda.autoinit)
    #from pycuda.autoinit import context
    #import pycuda.autoinit
    cuda.init()
    cudadevice = cuda.Device(devid)
    cudacontext = cudadevice.make_context()

    import atexit
    atexit.register(cudacontext.pop)

    # define the main process
    main()

    # finalize everything
    MPI.Finalize()
Esempio n. 4
0
def _process_common(args, mesh, soln, cfg):
    # Prefork to allow us to exec processes after MPI is initialised
    if hasattr(os, 'fork'):
        from pytools.prefork import enable_prefork

        enable_prefork()

    # Import but do not initialise MPI
    from mpi4py import MPI

    # Manually initialise MPI
    MPI.Init()

    # Ensure MPI is suitably cleaned up
    register_finalize_handler()

    # Create a backend
    backend = get_backend(args.backend, cfg)

    # Get the mapping from physical ranks to MPI ranks
    rallocs = get_rank_allocation(mesh, cfg)

    # Construct the solver
    solver = get_solver(backend, rallocs, mesh, soln, cfg)

    # Execute!
    solver.run()

    # Finalise MPI
    MPI.Finalize()
Esempio n. 5
0
    def fit(self, training_data, validation_data=None):

        MPI.Init()
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()

        for epoch in range(self.epochs):

            data = training_data[0]
            labels = training_data[1]
            mini_batches = self.create_batches(data, labels,
                                               self.mini_batch_size // size)

            for x, y in mini_batches:
                # doing props
                self.forward_prop(x)
                ma_nabla_b, ma_nabla_w = self.back_prop(y)

                # summing all ma_nabla_b and ma_nabla_w to nabla_w and nabla_b
                nabla_w = []
                nabla_b = []
                # TODO: add your code

                # calculate work
                self.weights = [
                    w - self.eta * dw for w, dw in zip(self.weights, nabla_w)
                ]
                self.biases = [
                    b - self.eta * db for b, db in zip(self.biases, nabla_b)
                ]

            self.print_progress(validation_data, epoch)

        MPI.Finalize()
 def train(self,
           train_data,
           num_epochs,
           mini_batch_sz,
           learning_rate=0.01,
           test_data=None):
     X = train_data[0]
     y = train_data[1]
     num_examples = len(X)
     MPI.Init()
     self.sgd(X, y, num_examples, num_epochs, test_data, mini_batch_sz,
              learning_rate)
     MPI.Finalize()
Esempio n. 7
0
    def __init__(self, shape, dimensions, input_comm=None, topology=None):
        super(Distributor, self).__init__(shape, dimensions)

        if configuration['mpi']:
            # First time we enter here, we make sure MPI is initialized
            if not MPI.Is_initialized():
                MPI.Init()
                global init_by_devito
                init_by_devito = True

            self._input_comm = (input_comm or MPI.COMM_WORLD).Clone()

            # Make sure the cloned communicator will be freed up upon exit
            def cleanup():
                if self._input_comm is not None:
                    self._input_comm.Free()

            atexit.register(cleanup)

            if topology is None:
                # `MPI.Compute_dims` sets the dimension sizes to be as close to each other
                # as possible, using an appropriate divisibility algorithm. Thus, in 3D:
                # * topology[0] >= topology[1] >= topology[2]
                # * topology[0] * topology[1] * topology[2] == self._input_comm.size
                # However, `MPI.Compute_dims` is distro-dependent, so we have to enforce
                # some properties through our own wrapper (e.g., OpenMPI v3 does not
                # guarantee that 9 ranks are arranged into a 3x3 grid when shape=(9, 9))
                self._topology = compute_dims(self._input_comm.size,
                                              len(shape))
            else:
                self._topology = topology

            if self._input_comm is not input_comm:
                # By default, Devito arranges processes into a cartesian topology.
                # MPI works with numbered dimensions and follows the C row-major
                # numbering of the ranks, i.e. in a 2x3 Cartesian topology (0,0)
                # maps to rank 0, (0,1) maps to rank 1, (0,2) maps to rank 2, (1,0)
                # maps to rank 3, and so on.
                self._comm = self._input_comm.Create_cart(self._topology)
            else:
                self._comm = input_comm
        else:
            self._input_comm = None
            self._comm = MPI.COMM_NULL
            self._topology = tuple(1 for _ in range(len(shape)))

        # The domain decomposition
        self._decomposition = [
            Decomposition(np.array_split(range(i), j), c)
            for i, j, c in zip(shape, self.topology, self.mycoords)
        ]
Esempio n. 8
0
    def fit(self, training_data, validation_data=None):
        # MPI setup
        MPI.Init()
        self.comm = MPI.COMM_WORLD
        self.rank = self.comm.Get_rank()
        self.size = self.comm.Get_size()

        self.layers_per_master = self.num_layers // self.num_masters

        # split up work
        if self.rank < self.num_masters:
            self.do_master(validation_data)
        else:
            self.do_worker(training_data)

        # when all is done
        self.comm.Barrier()
        MPI.Finalize()
Esempio n. 9
0
    def fit(self, training_data, validation_data=None):

        MPI.Init()
        comm = MPI.COMM_WORLD
        rank = comm.Get_rank()
        size = comm.Get_size()

        for epoch in range(self.epochs):

            data = training_data[0]
            labels = training_data[1]
            mini_batches = self.create_batches(data, labels,
                                               self.mini_batch_size // size)

            for x, y in mini_batches:
                # doing props
                self.forward_prop(x)
                ma_nabla_b, ma_nabla_w = self.back_prop(y)

                # summing all ma_nabla_b and ma_nabla_w to nabla_w and nabla_b
                nabla_w = []
                nabla_b = []

                for mw, mb in zip(ma_nabla_w, ma_nabla_b):
                    w = np.zeros_like(mw)
                    b = np.zeros_like(mb)
                    # comm.Allreduce(mw, w, op=MPI.SUM)
                    # comm.Allreduce(mb, b, op=MPI.SUM)
                    ringallreduce(mw, w, comm, _op)
                    ringallreduce(mb, b, comm, _op)
                    nabla_w.append(w)
                    nabla_b.append(b)

                # calculate work
                self.weights = [
                    w - self.eta * dw for w, dw in zip(self.weights, nabla_w)
                ]
                self.biases = [
                    b - self.eta * db for b, db in zip(self.biases, nabla_b)
                ]

            self.print_progress(validation_data, epoch)

        MPI.Finalize()
Esempio n. 10
0
    def _init_region_comm(self):
        """
        If in multi-node, this method will initialize information about MPI
        controllers.

        .. versionadded:: 0.6.0

        """
        if MPI is None:
            raise AttributeError("mpi4py is not imported")
        MPI.Init()
        self._region_comm = MPI.COMM_WORLD
        self._region_size = MPI.COMM_WORLD.Get_size()
        self._region_rank = MPI.COMM_WORLD.Get_rank()

        local_size = numpy.array([self._local_size], dtype='int32')
        self._all_local_size = numpy.zeros((self._region_size, ), dtype='int32')
        self._region_comm.Allgather([local_size, MPI.UNSIGNED_INT],
                                    [self._all_local_size, MPI.UNSIGNED_INT])
        self._global_size = sum(self._all_local_size)
# model classes must have identic name with python file in models directory
models_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                           models_dir)

# import GANs classes
for filename in os.listdir(models_path):
    modulename, ext = os.path.splitext(filename)
    if modulename != '__pycache__' and ext == '.py':
        subpackage = '{0}.{1}'.format(models_dir, modulename)
        obj = getattr(
            __import__(subpackage, globals(), locals(), [modulename]),
            modulename,
        )
        list_GANs.update({obj.model_name: obj})

MPI.Init()


def merge_args(cmdline_args, config_args):
    for key in config_args.keys():
        if key not in cmdline_args:
            sys.exit(
                'Error: unknown key in the configuration file \"{}\"'.format(
                    key))

    args = {}
    args.update(cmdline_args)
    args.update(config_args)

    return args
Esempio n. 12
0
    def spawn(self, **kwargs):
        """
        Spawn MPI processes for and execute each of the managed targets.

        Parameters
        ----------
        kwargs: dict
                options for the `info` argument in mpi spawn process.
                see https://www.open-mpi.org/doc/v4.0/man3/MPI_Comm_spawn.3.php
        """

        # Typcially MPI must be have intialized before spawning.
        if not MPI.Is_initialized():
            MPI.Init()

        if self._is_parent:
            # Find the path to the mpi_backend.py script (which should be in the
            # same directory as this module:
            parent_dir = os.path.dirname(__file__)
            mpi_backend_path = os.path.join(parent_dir, 'mpi_backend.py')

            # Set spawn option. Due to --oversubscribe, we will use none in binding
            info = Info.Create()
            info.Set('bind_to', "none")

            for k, v in kwargs.items():
                info.Set(k, v)

            # Spawn processes:
            self._intercomm = MPI.COMM_SELF.Spawn(sys.executable,
                                                  args=[mpi_backend_path],
                                                  maxprocs=len(self),
                                                  info=info)

            # First, transmit twiggy logging emitters to spawned processes so
            # that they can configure their logging facilities:
            for i in self._targets:
                self._intercomm.send(twiggy.emitters, i)

            # Next, serialize the routing table ONCE and then transmit it to all
            # of the child nodes:
            try:
                routing_table = self.routing_table
            except:
                routing_table = RoutingTable()
                self.log_warning(
                    'Routing Table is null, using empty routing table.')

            self._intercomm.bcast(routing_table, root=MPI.ROOT)

            # Transmit class to instantiate, globals required by the class, and
            # the constructor arguments; the backend will wait to receive
            # them and then start running the targets on the appropriate nodes.
            req = MPI.Request()
            r_list = []
            for i in self._targets:
                target_globals = all_global_vars(self._targets[i])

                # Serializing atexit with dill appears to fail in virtualenvs
                # sometimes if atexit._exithandlers contains an unserializable function:
                if 'atexit' in target_globals:
                    del target_globals['atexit']
                data = (self._targets[i], target_globals, self._kwargs[i])
                r_list.append(self._intercomm.isend(data, i))

                # Need to clobber data to prevent all_global_vars from
                # including it in its output:
                del data
            req.Waitall(r_list)
def DNNT():

    # Initializing the MPI and testing if it's been initialized
    MPI.Init()
    print(MPI.Is_initialized())
    print(MPI.Is_finalized())

    # Get Parameters
    generation, dataset, mutationChance, param = getParameters()

    # Initialize the fitness
    fitnessParent = -1
    # The fitness of the parent
    fitnessChild = -1
    # The fitness of the child
    networkFitness = -1
    # The fitness of the network
    genBestFitness = -1
    # Fitness of the generation

    # Initialize the classes
    net, ga, com, pd = initClasses(param, MPI, networkFitness)

    # Get the logger
    # filename = 'output{}.log'.format(pd.rank)
    filename = 'output.log'
    logger = logging.getLogger()
    handler = logging.FileHandler(filename)
    handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)

    # initialize the networks
    # one random network at every processor
    data = net.initNetwork()

    # Start running GA (Genetic Algorithm) generation
    for g in range(generation):

        if genBestFitness < 100:

            # GET PARENT FITNESS/ACCURACY
            # Every processor trains and evaluate the accuracy/fitness of the parent network
            fitnessParent = ga.getFitness(data, dataset)

            # BREED THE CHILD
            # This to be done using MPI ISend
            # Get the parent using Non Blocking exchange
            child = ga.breeding(data, mutationChance,
                                pd.nonBlockingExchange(data))
            MPI.COMM_WORLD.Barrier()

            # GET CHILD'S FITNESS/ACCURACY
            # Every processor trains and evaluate the accuracy/fitness of the child network
            fitnessChild = ga.getFitness(child, dataset)
            '''
			If the network fitness has improved over previous generation, 
				then pass on the features/hyperparameters
			Pass on the better of the two (parent or child) from this generation to the next generation
			Comparison done of the previous value at the procecssor with the new computed value
			'''
            networkFitness, data = com.networkData(fitnessParent, fitnessChild,
                                                   data, child)

            logger.debug(
                'generation=%d, Rank=%d, processid=%s, parent=%s, child=%s, '
                'parentFitness=%0.4f, childFitness=%0.4f, networkFitness=%0.4f',
                g, pd.rank, socket.gethostname(), data, child, fitnessParent,
                fitnessChild, networkFitness)
            '''
			Compare the fitness of the best networks of all the families
			Compares the fitness of all the networks data that are with all the processors in the communication
			Get the best fitness the generation 
			Kill the poorest performing of the population 
			Randomly initialize the poorest fitness population to keep the population constant
			'''
            genBestFitness, data = com.genFitness(data, param, MPI)
            print(genBestFitness, data)

        else:
            # Broadcast the best results to all the processors
            pd.broadcast(data, pd.rank)
            print('best fitness achieved')
            # And halt
            MPI.Finalize()

    MPI.Finalize()
Esempio n. 14
0
def INNT():

    # Initializing the MPI and testing if it's been initialized
    MPI.Init()
    print(MPI.Is_initialized())
    print(MPI.Is_finalized())

    # Get Parameters
    generation, dataset, mutationChance, param, groupSize = getParameters()

    # Initialize the fitness
    fitnessParent = -1
    # The fitness of the parent
    fitnessChild = -1
    # The fitness of the child
    networkFitness = -1
    # The fitness of the network
    genBestFitness = -1
    # Fitness of the generation

    # Initialize the classes
    net, ga, com, pd = initClasses(param, MPI, groupSize, networkFitness)

    # Get the logger
    # filename = 'output{}.log'.format(pd.rank)
    filename = 'output.log'
    logger = logging.getLogger()
    handler = logging.FileHandler(filename)
    handler.setLevel(logging.DEBUG)
    formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(logging.DEBUG)

    # Split the communicator
    subGroup = pd.rank / groupSize
    subComm = MPI.Comm.Split(MPI.COMM_WORLD, subGroup, pd.rank)

    # initialize the networks
    # one random network at every processor

    # INITIALIZZEEEE ISLAND WITH SOME SPECIALITYYYYY
    data = net.initNetwork()

    # Islands differ in activation function
    # Since there will be at min 2 subgroups
    if pd.subGroup == 0:
        data['activation'] = 'sigmoid'
    elif pd.subGroup == 1:
        data['activation'] = 'elu'
    else:
        data['activation'] = 'selu'

    # Start running GA (Genetic Algorithm) generation
    for g in range(generation):

        if genBestFitness < 100:

            # GET PARENT FITNESS/ACCURACY
            # Every processor trains and evaluate the accuracy/fitness of the parent network
            fitnessParent = ga.getFitness(data, dataset)
            print('loop_1 done', g, pd.rank)

            # BREED THE CHILD
            # This to be done using MPI ISend
            # Get the parent using Non Blocking exchange
            child = ga.breeding(pd.rank, g, data, mutationChance,
                                pd.intraIslandExchange(data, subComm))
            MPI.COMM_WORLD.Barrier()

            # GET CHILD'S FITNESS/ACCURACY
            # Every processor trains and evaluate the accuracy/fitness of the child network
            fitnessChild = ga.getFitness(child, dataset)
            '''
			If the network fitness has improved over previous generation, 
				then pass on the features/hyperparameters
			Pass on the better of the two (parent or child) from this generation to the next generation
			Comparison done - of the previous value at the procecssor with the new computed value
			'''
            networkFitness, data = com.networkData(fitnessParent, fitnessChild,
                                                   data, child)
            '''
			Compare the fitness of the best networks of all the families
			Compares the fitness of all the networks data that are with all the processors in the communication
			Get the best fitness the generation 
			Kill the poorest performing of the population 
			Randomly initialize the poorest fitness population to keep the population constant
			'''
            genBestFitness, data = com.genFitness(data, param, MPI, groupSize)
            # print(genBestFitness, data)

            logger.debug(
                'generation=%d, Rank=%d, processid=%s, group=ID%d, subRank=%d, parent=%s, child=%s, parentFitness=%0.4f, childFitness=%0.4f, networkFitness=%0.4f, genBestFitness=%0.4f',
                g, pd.rank, socket.gethostname(), pd.subGroup,
                subComm.Get_rank(), data, child, fitnessParent, fitnessChild,
                networkFitness, genBestFitness)
            '''
			Do inter-island exchange after every 5 generations
				In this all the ranks are sending the data to the previous ranks
			'''
            if g % 5 == 0:
                pd.interIslandExchange(data, subComm)
            print('loop_6 done', pd.rank)
            MPI.COMM_WORLD.Barrier()

        else:
            # Broadcast the best results to all the processors
            pd.broadcast(data, pd.rank)
            print('best fitness achieved')
            # And halt
            MPI.Finalize()

    MPI.Finalize()
Esempio n. 15
0
def init_process_group():
    if not MPI.Is_initialized():
        MPI.Init()
    global _comm
    _comm = MPI.COMM_WORLD
Esempio n. 16
0
def init():
    if not MPI.Is_initialized():
        # print "initializing..."
        MPI.Init()
    else:
        pass
Esempio n. 17
0
 def init(self):
     # Manually initialise MPI
     if not self.mpi_init:
         self.mpi_init = True
         MPI.Init()
Esempio n. 18
0
#!/usr/bin/python3
from mpi4py import rc
rc.initialize = False

from mpi4py import MPI as mpi
from time import sleep

mpi.Init()
comm = mpi.COMM_WORLD
rank = comm.Get_rank()

# if rank == 1:
#     sleep(2)

for i in range(0, 10):
    if rank == 0:
        data = {'a': i, 'b': 3.14}
        print(data)
        # sleep(1)
        req = comm.isend(data, dest=(rank + 1), tag=0)
        # sleep(2)
        # req.wait()
        # print(rank, req.wait())
    elif rank == 1:
        req = comm.irecv(source=(rank - 1), tag=0)
        print(rank, req.wait())
        # data = req.wait()
        # while 1:
        #     r = req.test()
        #     if r[0]:
        #         print(r[1])
Esempio n. 19
0
def initialize():
    if not MPI.Is_initialized():
        MPI.Init()
    global _comm
    _comm = MPI.COMM_WORLD