Esempio n. 1
0
    def test_standard(self):
        try:
            from mpi4py import MPI
            if MPI.COMM_WORLD.Get_size() == 1:
                # mpi4py available but NOT called via mpirun
                MPI = None
        except ImportError:
            # mpi4py not even present! Single node by default:
            MPI = None

        self.assertAlmostEqual(comp_utils.get_MPI(), MPI)
Esempio n. 2
0
    def _unit_computation(self):
        if self.verbose and self.mpi_rank == 0:
            print("Rank {} at custom _unit_computation".format(self.mpi_rank))

        resp_2d_list, dc_vec_list = self.data

        req_cores = self._cores
        MPI = get_MPI()
        if MPI is not None:
            rank = MPI.COMM_WORLD.Get_rank()
            cores = 1
        else:
            rank = 0
            cores = self._cores

        if self.verbose:
            print(
                'Rank {} starting computing on {} cores (requested {} cores)'.
                format(rank, cores, req_cores))

        if cores > 1:
            values = []
            for loops_2d, curr_vdc in zip(resp_2d_list, dc_vec_list):
                values += [
                    joblib.delayed(self._map_function)(x, [curr_vdc])
                    for x in loops_2d
                ]
            results = joblib.Parallel(n_jobs=cores)(values)

            # Finished reading the entire data set
            print('Rank {} finished parallel computation'.format(rank))

        else:
            if self.verbose:
                print("Rank {} computing serially ...".format(rank))
            # List comprehension vs map vs for loop?
            # https://stackoverflow.com/questions/1247486/python-list-comprehension-vs-map
            results = []
            for loops_2d, curr_vdc in zip(resp_2d_list, dc_vec_list):
                results += [
                    self._map_function(vector, curr_vdc) for vector in loops_2d
                ]

        self._results = results
Esempio n. 3
0
    def __init__(self, h5_main, cores=None, max_mem_mb=4 * 1024, verbose=False):
        """
        Parameters
        ----------
        h5_main : :class:`~pyUSID.io.usi_data.USIDataset`
            The USID main HDF5 dataset over which the analysis will be performed.
        cores : uint, optional
            How many cores to use for the computation. Default: all available cores - 2 if operating outside MPI context
        max_mem_mb : uint, optional
            How much memory to use for the computation.  Default 1024 Mb
        verbose : bool, Optional, default = False
            Whether or not to print debugging statements
        """
        super(Process, self).__init__(h5_main, verbose=verbose)

        MPI = get_MPI()

        if MPI is not None:
            # If we came here then, the user has intentionally asked for multi-node computation
            comm = MPI.COMM_WORLD
            self.mpi_comm = comm
            self.mpi_rank = comm.Get_rank()
            self.mpi_size = comm.Get_size()

            if verbose:
                print("Rank {} of {} on {} sees {} logical cores on the socket".format(comm.Get_rank(), comm.Get_size(),
                                                                                       MPI.Get_processor_name(),
                                                                                       cpu_count()))

            # First, ensure that cores=logical cores in node. No point being economical / considerate
            cores = psutil.cpu_count()

            # It is sufficient if just one rank checks all this.
            if self.mpi_rank == 0:
                print('Working on {} ranks via MPI'.format(self.mpi_size))

            # Ensure that the file is opened in the correct comm or something
            if h5_main.file.driver != 'mpio':
                raise TypeError('The HDF5 file should have been opened with driver="mpio". Current driver = "{}"'
                                ''.format(h5_main.file.driver))

            """
            # Not sure how to check for this correctly
            messg = None
            try:
                if h5_main.file.comm != comm:
                    messg = 'The HDF5 file should have been opened with comm=MPI.COMM_WORLD. Currently comm={}'
                            ''.format(h5_main.file.comm)
            except AttributeError:
                messg = 'The HDF5 file should have been opened with comm=MPI.COMM_WORLD'
            if messg is not None:
                raise TypeError(messg)
            """

        else:
            if verbose:
                print('No mpi4py found or script was not called via mpixexec / mpirun. '
                      'Assuming single node computation')
            self.mpi_comm = None
            self.mpi_size = 1
            self.mpi_rank = 0

        # self.verbose = verbose and self.mpi_rank == 0

        if MPI is not None:
            MPI.COMM_WORLD.barrier()
        # Not sure if we need a barrier here.

        # Saving these as properties of the object:
        self._cores = None
        self.__ranks_on_socket = 1
        self.__socket_master_rank = 0
        self._max_pos_per_read = None
        self._max_mem_mb = None

        self._set_memory_and_cores(cores=cores, mem=max_mem_mb)

        # Now have to be careful here since the below properties are a function of the MPI rank
        self.__start_pos = None
        self.__rank_end_pos = None
        self.__end_pos = None
        self.__pixels_in_batch = None

        # Determining the max size of the data that can be put into memory
        # all ranks go through this and they need to have this value any
        self.duplicate_h5_groups = []
        self.partial_h5_groups = []
        self.process_name = None  # Reset this in the extended classes
        self.parms_dict = None