Example #1
0
    def _set_memory_and_cores(self):
        """
        Checks hardware limitations such as memory, # cpus and sets the recommended datachunk sizes and the
        number of cores to be used by analysis methods.
        """

        if self._parallel:
            self._maxCpus = max(1, psutil.cpu_count() - 2)
        else:
            self._maxCpus = 1

        if self._maxCpus == 1:
            self._parallel = False

        self._maxMemoryMB = get_available_memory() / 1024**2  # in Mb

        self._maxDataChunk = int(self._maxMemoryMB / self._maxCpus)

        # Now calculate the number of positions that can be stored in memory in one go.
        mb_per_position = self.h5_main.dtype.itemsize * self.h5_main.shape[
            1] / 1024.0**2

        # TODO: The size of the chunk should be determined by BOTH the computation time and memory restrictions
        self._max_pos_per_read = int(
            np.floor(self._maxDataChunk / mb_per_position))
        if self._verbose:
            print('Allowed to read {} pixels per chunk'.format(
                self._max_pos_per_read))
Example #2
0
    def _set_memory_and_cores(self, cores=None, mem=None):
        """
        Checks hardware limitations such as memory, # cpus and sets the recommended datachunk sizes and the
        number of cores to be used by analysis methods.
        Parameters
        ----------
        cores : uint, optional
            Default - 1
            How many cores to use for the computation
        mem : uint, optional
            Default - 1024
            The amount a memory in Mb to use in the computation
        """
        min_free_cores = 1 + int(psutil.cpu_count() > 4)

        if cores is None:
            self._cores = max(1, psutil.cpu_count() - min_free_cores)
        else:
            if not isinstance(cores, int):
                raise TypeError(
                    'cores should be an integer but got: {}'.format(cores))
            cores = int(abs(cores))
            self._cores = max(1, min(psutil.cpu_count(), cores))

        _max_mem_mb = get_available_memory() / 1E6  # in MB
        if mem is None:
            mem = _max_mem_mb
        else:
            if not isinstance(mem, int):
                raise TypeError('mem must be a whole number')
            mem = abs(mem)

        self._max_mem_mb = min(_max_mem_mb, mem)

        max_data_chunk = self._max_mem_mb / self._cores

        # Now calculate the number of positions that can be stored in memory in one go.
        mb_per_position = self.h5_main.dtype.itemsize * self.h5_main.shape[
            1] / 1e6
        self._max_pos_per_read = int(np.floor(max_data_chunk /
                                              mb_per_position))

        if self.verbose and self.mpi_rank == 0:
            # expected to be the same for all ranks so just use this.
            print('Allowed to read {} pixels per chunk'.format(
                self._max_pos_per_read))
            print('Allowed to use up to', str(self._cores), 'cores and',
                  str(self._max_mem_mb), 'MB of memory')
Example #3
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(
        components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)
    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError(
            'SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)
    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize +
                   h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size /
                                                1024.0**2))
    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print(
        'Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp,
                                    rebuild,
                                    'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'),
                                    get_attr(h5_main, 'units'),
                                    None,
                                    None,
                                    h5_pos_inds=h5_main.h5_pos_inds,
                                    h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds,
                                    h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks,
                                    compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(
            comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Example #4
0
 def test_get_available_memory_rerouting(self):
     if sys.version_info.major == 3:
         with self.assertWarns(FutureWarning):
             _ = io_utils.get_available_memory()
     self.assertEqual(comp_utils.get_available_memory(),
                      io_utils.get_available_memory())
Example #5
0
    def compute(self, override=False, *args, **kwargs):
        """
        Creates placeholders for the results, applies the unit computation to chunks of the dataset
        Parameters
        ----------
        override : bool, optional. default = False
            By default, compute will simply return duplicate results to avoid recomputing or resume computation on a
            group with partial results. Set to True to force fresh computation.
        args : list
            arguments to the mapped function in the correct order
        kwargs : dictionary
            keyword arguments to the mapped function
        Returns
        -------
        h5_results_grp : h5py.Group object
            Group containing all the results
        """
        class SimpleFIFO(object):
            """
            Simple class that maintains a moving average of some numbers.
            """
            def __init__(self, length=5):
                """
                Create a SimpleFIFO object

                Parameters
                ----------
                length : unsigned integer
                    Number of values that need to be maintained for the moving average
                """
                self.__queue = list()
                if not isinstance(length, int):
                    raise TypeError('length must be a positive integer')
                if length <= 0:
                    raise ValueError('length must be a positive integer')
                self.__max_length = length
                self.__count = 0

            def put(self, item):
                """
                Adds the item to the internal queue. If the size of the queue exceeds its capacity, the oldest
                item is removed.

                Parameters
                ----------
                item : float or int
                    Any real valued number
                """
                if (not isinstance(item, Number)) or isinstance(item, complex):
                    raise TypeError(
                        'Provided item: {} is not a Number'.format(item))
                self.__queue.append(item)
                self.__count += 1
                if len(self.__queue) > self.__max_length:
                    _ = self.__queue.pop(0)

            def get_mean(self):
                """
                Returns the average of the elements within the queue

                Returns
                -------
                avg : number.Number
                    Mean of all elements within the queue
                """
                return np.mean(self.__queue)

            def get_cycles(self):
                """
                Returns the number of items that have been added to the queue in total

                Returns
                -------
                count : int
                    number of items that have been added to the queue in total
                """
                return self.__count

        if not override:
            if len(self.duplicate_h5_groups) > 0:
                if self.mpi_rank == 0:
                    print('Returned previously computed results at ' +
                          self.duplicate_h5_groups[-1].name)
                return self.duplicate_h5_groups[-1]
            elif len(self.partial_h5_groups) > 0:
                if self.mpi_rank == 0:
                    print('Resuming computation in group: ' +
                          self.partial_h5_groups[-1].name)
                self.use_partial_computation()

        resuming = False
        if self.h5_results_grp is None:
            # starting fresh
            if self.verbose and self.mpi_rank == 0:
                print('Creating HDF5 group and datasets to hold results')
            self._create_results_datasets()
        else:
            # resuming from previous checkpoint
            resuming = True
            self._get_existing_datasets()

        self.__create_compute_status_dataset()

        if resuming and self.mpi_rank == 0:
            percent_complete = int(
                100 * len(np.where(self._h5_status_dset[()] == 0)[0]) /
                self._h5_status_dset.shape[0])
            print('Resuming computation. {}% completed already'.format(
                percent_complete))

        self.__assign_job_indices()

        # Not sure if this is necessary but I don't think it would hurt either
        if self.mpi_comm is not None:
            self.mpi_comm.barrier()

        compute_times = SimpleFIFO(5)
        write_times = SimpleFIFO(5)
        orig_rank_start = self.__start_pos

        if self.mpi_rank == 0 and self.mpi_size == 1:
            if self.__resume_implemented:
                print(
                    '\tThis class (likely) supports interruption and resuming of computations!\n'
                    '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n'
                    '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n'
                    '\tIf you are operating on a cluster and your job gets killed, re-run the job to resume\n'
                )
            else:
                print(
                    '\tThis class does NOT support interruption and resuming of computations.\n'
                    '\tIn order to enable this feature, simply implement the _get_existing_datasets() function'
                )

        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            print('Rank: {} - with nothing loaded has {} free memory'
                  ''.format(self.mpi_rank,
                            format_size(get_available_memory())))

        self._read_data_chunk()

        if self.mpi_comm is not None:
            self.mpi_comm.barrier()

        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            print('Rank: {} - with only raw data loaded has {} free memory'
                  ''.format(self.mpi_rank,
                            format_size(get_available_memory())))

        while self.data is not None:

            num_jobs_in_batch = self.__end_pos - self.__start_pos

            t_start_1 = tm.time()

            self._unit_computation(*args, **kwargs)

            comp_time = np.round(tm.time() - t_start_1,
                                 decimals=2)  # in seconds
            time_per_pix = comp_time / num_jobs_in_batch
            compute_times.put(time_per_pix)

            if self.verbose:
                print(
                    'Rank {} - computed chunk in {} or {} per pixel. Average: {} per pixel'
                    '.'.format(self.mpi_rank, format_time(comp_time),
                               format_time(time_per_pix),
                               format_time(compute_times.get_mean())))

            # Ranks can become memory starved. Check memory usage - raw data + results in memory at this point
            if self.verbose and self.mpi_rank == self.__socket_master_rank:
                print(
                    'Rank: {} - now holding onto raw data + results has {} free memory'
                    ''.format(self.mpi_rank,
                              format_size(get_available_memory())))

            t_start_2 = tm.time()
            self._write_results_chunk()

            # NOW, update the positions. Users are NOT allowed to touch start and end pos
            self.__start_pos = self.__end_pos
            # Leaving in this provision that will allow restarting of processes
            if self.mpi_size == 1:
                self.h5_results_grp.attrs['last_pixel'] = self.__end_pos
            # Child classes don't even have to worry about flushing. Process will do it.
            self.h5_main.file.flush()

            dump_time = np.round(tm.time() - t_start_2, decimals=2)
            write_times.put(dump_time / num_jobs_in_batch)

            if self.verbose:
                print('Rank {} - wrote its {} pixel chunk in {}'.format(
                    self.mpi_rank, num_jobs_in_batch, format_time(dump_time)))

            time_remaining = (self.__rank_end_pos - self.__end_pos) * \
                             (compute_times.get_mean() + write_times.get_mean())

            if self.verbose or self.mpi_rank == 0:
                percent_complete = int(100 *
                                       (self.__end_pos - orig_rank_start) /
                                       (self.__rank_end_pos - orig_rank_start))
                print('Rank {} - {}% complete. Time remaining: {}'.format(
                    self.mpi_rank, percent_complete,
                    format_time(time_remaining)))

            # All ranks should mark the pixels for this batch as completed. 'last_pixel' attribute will be updated later
            # Setting each section to 1 independently
            for section in to_ranges(self.__pixels_in_batch):
                self._h5_status_dset[section[0]:section[1] + 1] = 1

            self._read_data_chunk()

        if self.verbose:
            print('Rank {} - Finished computing all jobs!'.format(
                self.mpi_rank))

        if self.mpi_comm is not None:
            self.mpi_comm.barrier()

        if self.mpi_rank == 0:
            print('Finished processing the entire dataset!')

        # Update the legacy 'last_pixel' attribute here:
        if self.mpi_rank == 0:
            self.h5_results_grp.attrs['last_pixel'] = self.h5_main.shape[0]

        return self.h5_results_grp
Example #6
0
    def _set_memory_and_cores(self, cores=None, mem=None):
        """
        Checks hardware limitations such as memory, # cpus and sets the recommended datachunk sizes and the
        number of cores to be used by analysis methods. This function can work with clusters with heterogeneous
        memory sizes (e.g. CADES SHPC Condo).

        Parameters
        ----------
        cores : uint, optional
            Default - 1
            How many cores to use for the computation
        mem : uint, optional
            Default - 1024
            The amount a memory in Mb to use in the computation
        """
        if MPI is None:
            min_free_cores = 1 + int(psutil.cpu_count() > 4)

            if cores is None:
                self._cores = max(1, psutil.cpu_count() - min_free_cores)
            else:
                if not isinstance(cores, int):
                    raise TypeError(
                        'cores should be an integer but got: {}'.format(cores))
                cores = int(abs(cores))
                self._cores = max(1, min(psutil.cpu_count(), cores))

            self.__socket_master_rank = 0
            self.__ranks_on_socket = 1
        else:
            # user-provided input cores will simply be ignored in an effort to use the entire CPU
            ranks_by_socket = group_ranks_by_socket(verbose=self.verbose)
            self.__socket_master_rank = ranks_by_socket[self.mpi_rank]
            # which ranks in this socket?
            ranks_on_this_socket = np.where(
                ranks_by_socket == self.__socket_master_rank)[0]
            # how many in this socket?
            self.__ranks_on_socket = ranks_on_this_socket.size
            # Force usage of all available memory
            mem = None
            self._cores = 1
            # Disabling the following line since mpi4py and joblib didn't play well for Bayesian Inference
            # self._cores = self.__cores_per_rank = psutil.cpu_count() // self.__ranks_on_socket

        # TODO: Convert all to bytes!
        _max_mem_mb = get_available_memory() / 1024**2  # in MB
        if mem is None:
            mem = _max_mem_mb
        else:
            if not isinstance(mem, int):
                raise TypeError('mem must be a whole number')
            mem = abs(mem)

        self._max_mem_mb = min(_max_mem_mb, mem)

        # Remember that multiple processes (either via MPI or joblib) will share this socket
        max_data_chunk = self._max_mem_mb / (self._cores *
                                             self.__ranks_on_socket)

        # Now calculate the number of positions OF RAW DATA ONLY that can be stored in memory in one go PER RANK
        mb_per_position = self.h5_main.dtype.itemsize * self.h5_main.shape[
            1] / 1024**2
        self._max_pos_per_read = int(np.floor(max_data_chunk /
                                              mb_per_position))

        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            # expected to be the same for all ranks so just use this.
            print(
                'Rank {} - on socket with {} logical cores and {} avail. RAM shared by {} ranks each given {} cores'
                '.'.format(self.__socket_master_rank, psutil.cpu_count(),
                           format_size(_max_mem_mb * 1024**2, 2),
                           self.__ranks_on_socket, self._cores))
            print('Allowed to read {} pixels per chunk'.format(
                self._max_pos_per_read))