Example #1
0
    def _calc_raw(self):
        """

        Returns
        -------

        """
        mem_per_pix = self.n_sho_bins * self.h5_sho_fit.dtype.itemsize + self.n_spec_bins * self.h5_raw.dtype.itemsize

        free_mem = get_available_memory()
        batch_size = int(free_mem / mem_per_pix)
        batches = gen_batches(self.n_pixels, batch_size)

        w_vec = self.h5_spec_vals[self.h5_raw.spec_dim_labels.index(
            'Frequency')].squeeze()
        w_vec = w_vec[:self.n_bins]

        for pix_batch in batches:
            sho_chunk = self.h5_sho_fit[pix_batch, :].flatten()

            raw_data = np.zeros([sho_chunk.shape[0], self.n_bins],
                                dtype=np.complex64)
            for iparm, sho_parms in enumerate(sho_chunk):
                raw_data[iparm, :] = SHOfunc(sho_parms, w_vec)

            self.h5_raw[pix_batch, :] = raw_data.reshape(
                [-1, self.n_spec_bins])

            self.h5_file.flush()

        return
Example #2
0
    def test_standard(self):
        from psutil import virtual_memory
        mem = virtual_memory().available

        if sys.maxsize <= 2 ** 32:
            mem = min([mem, sys.maxsize])

        self.assertTrue(abs(mem-comp_utils.get_available_memory()) < 0.01 * virtual_memory().total)
Example #3
0
    def _read_data_chunk(self):
        """
        Reads a chunk of data for the intended computation into memory
        """
        if self.__start_pos < self.__rank_end_pos:
            self.__end_pos = int(
                min(self.__rank_end_pos,
                    self.__start_pos + self._max_pos_per_read))

            # DON'T DIRECTLY apply the start and end indices anymore to the h5 dataset. Find out what it means first
            self.__pixels_in_batch = self.__compute_jobs[self.__start_pos:self.
                                                         __end_pos]

            if self.verbose:
                print('Rank {} will read positions: {}'.format(
                    self.mpi_rank, self.__pixels_in_batch))
                bytes_this_read = self.__bytes_per_pos * len(
                    self.__pixels_in_batch)
                print('Rank {} will read {} of the SOURCE dataset'
                      '.'.format(self.mpi_rank, format_size(bytes_this_read)))
                if self.mpi_rank == self.__socket_master_rank:
                    tot_workers = self.__ranks_on_socket * self._cores
                    print('Rank: {} available memory: {}. '
                          '{} workers on this socket will in total read ~ {}'
                          '.'.format(
                              self.mpi_rank,
                              format_size(get_available_memory()), tot_workers,
                              format_size(bytes_this_read * tot_workers)))

            # Reading as Dask array to minimize memory copies when restructuring in child classes
            if self.__lazy:
                main_dset = lazy_load_array(self.h5_main)
            else:
                main_dset = self.h5_main

            self.data = main_dset[self.__pixels_in_batch, :]
            # DON'T update the start position

        else:
            if self.verbose:
                print('Rank {} - Finished reading all data!'.format(
                    self.mpi_rank))
            self.data = None
Example #4
0
    def _check_available_mem(self):
        """
        Check that there is enough memory to perform the SVD decomposition.
        
        :raise: MemoryError if not enough memory found
        
        :returns: True is enough memory found, False otherwise.
        :rtype: bool
            

        """
        if self.verbose:
            print('Checking memory availability.')
        n_samples, n_features = self.h5_main.shape
        s_mem_per_comp = np.float32(0).itemsize
        u_mem_per_comp = np.float32(0).itemsize * n_samples
        v_mem_per_comp = self.h5_main.dtype.itemsize * n_features

        mem_per_comp = s_mem_per_comp + u_mem_per_comp + v_mem_per_comp
        max_mem = get_available_memory()
        avail_mem = 0.75 * max_mem
        free_mem = avail_mem - self.h5_main.__sizeof__()

        if free_mem <= 0:
            error_message = 'Cannot load main dataset into memory.\n' + \
                            'Available memory is {}.  Dataset needs {}.'.format(avail_mem,
                                                                                self.h5_main.__sizeof__())
            raise MemoryError(error_message)

        if self.verbose:
            print('Memory available for SVD is {}.'.format(free_mem))
            print('Memory needed per component is {}.'.format(mem_per_comp))

        cant_svd = (free_mem - self.num_components * mem_per_comp) <= 0

        if cant_svd:
            max_comps = np.floor(free_mem / mem_per_comp, dtype=int)
            error_message = 'Not enough free memory for performing SVD with requested number of parameters.\n' + \
                            'Maximum possible parameters is {}.'.format(max_comps)
            raise MemoryError(error_message)
Example #5
0
    def compute(self, override=False, *args, **kwargs):
        """
        Creates placeholders for the results, applies the :meth:`~pyUSID.processing.process.Process._unit_computation`
        to chunks of the dataset

        Parameters
        ----------
        override : bool, optional. default = False
            By default, compute will simply return duplicate results to avoid recomputing or resume computation on a
            group with partial results. Set to True to force fresh computation.
        args : list
            arguments to the mapped function in the correct order
        kwargs : dict
            keyword arguments to the mapped function

        Returns
        -------
        h5_results_grp : :class:`h5py.Group`
            Group containing all the results
        """
        class SimpleFIFO(object):
            """
            Simple class that maintains a moving average of some numbers.
            """
            def __init__(self, length=5):
                """
                Create a SimpleFIFO object

                Parameters
                ----------
                length : unsigned integer
                    Number of values that need to be maintained for the moving average
                """
                self.__queue = list()
                if not isinstance(length, int):
                    raise TypeError('length must be a positive integer')
                if length <= 0:
                    raise ValueError('length must be a positive integer')
                self.__max_length = length
                self.__count = 0

            def put(self, item):
                """
                Adds the item to the internal queue. If the size of the queue exceeds its capacity, the oldest
                item is removed.

                Parameters
                ----------
                item : float or int
                    Any real valued number
                """
                if (not isinstance(item, Number)) or isinstance(item, complex):
                    raise TypeError(
                        'Provided item: {} is not a Number'.format(item))
                self.__queue.append(item)
                self.__count += 1
                if len(self.__queue) > self.__max_length:
                    _ = self.__queue.pop(0)

            def get_mean(self):
                """
                Returns the average of the elements within the queue

                Returns
                -------
                avg : number.Number
                    Mean of all elements within the queue
                """
                return np.mean(self.__queue)

            def get_cycles(self):
                """
                Returns the number of items that have been added to the queue in total

                Returns
                -------
                count : int
                    number of items that have been added to the queue in total
                """
                return self.__count

        if not override:
            if len(self.duplicate_h5_groups) > 0:
                if self.mpi_rank == 0:
                    print('Returned previously computed results at ' +
                          self.duplicate_h5_groups[-1].name)
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                return self.duplicate_h5_groups[-1]
            elif len(self.partial_h5_groups
                     ) > 0 and self.h5_results_grp is None:
                if self.mpi_rank == 0:
                    print('Resuming computation in group: ' +
                          self.partial_h5_groups[-1].name)
                self.use_partial_computation()

        resuming = False
        if self.h5_results_grp is None:
            # starting fresh
            if self.verbose and self.mpi_rank == 0:
                print('Creating HDF5 group and datasets to hold results')
            self._create_results_datasets()
            self._write_source_dset_provenance()
        else:
            # resuming from previous checkpoint
            resuming = True
            self._get_existing_datasets()

        self.__create_compute_status_dataset()

        if resuming and self.mpi_rank == 0:
            percent_complete = int(
                100 * len(np.where(self._h5_status_dset[()] == 1)[0]) /
                self._h5_status_dset.shape[0])
            print('Resuming computation. {}% completed already'.format(
                percent_complete))

        self.__assign_job_indices()

        # Not sure if this is necessary but I don't think it would hurt either
        if self.mpi_comm is not None:
            self.mpi_comm.barrier()

        compute_times = SimpleFIFO(5)
        write_times = SimpleFIFO(5)
        orig_rank_start = self.__start_pos

        if self.mpi_rank == 0 and self.mpi_size == 1:
            if self.__resume_implemented:
                print(
                    '\tThis class (likely) supports interruption and resuming of computations!\n'
                    '\tIf you are operating in a python console, press Ctrl+C or Cmd+C to abort\n'
                    '\tIf you are in a Jupyter notebook, click on "Kernel">>"Interrupt"\n'
                    '\tIf you are operating on a cluster and your job gets killed, re-run the job to resume\n'
                )
            else:
                print(
                    '\tThis class does NOT support interruption and resuming of computations.\n'
                    '\tIn order to enable this feature, simply implement the _get_existing_datasets() function'
                )

        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            print('Rank: {} - with nothing loaded has {} free memory'
                  ''.format(self.mpi_rank,
                            format_size(get_available_memory())))

        self._read_data_chunk()

        if self.mpi_comm is not None:
            self.mpi_comm.barrier()

        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            print('Rank: {} - with only raw data loaded has {} free memory'
                  ''.format(self.mpi_rank,
                            format_size(get_available_memory())))

        while self.data is not None:

            num_jobs_in_batch = self.__end_pos - self.__start_pos

            t_start_1 = tm.time()

            self._unit_computation(*args, **kwargs)

            comp_time = np.round(tm.time() - t_start_1,
                                 decimals=2)  # in seconds
            time_per_pix = comp_time / num_jobs_in_batch
            compute_times.put(time_per_pix)

            if self.verbose:
                print(
                    'Rank {} - computed chunk in {} or {} per pixel. Average: {} per pixel'
                    '.'.format(self.mpi_rank, format_time(comp_time),
                               format_time(time_per_pix),
                               format_time(compute_times.get_mean())))

            # Ranks can become memory starved. Check memory usage - raw data + results in memory at this point
            if self.verbose and self.mpi_rank == self.__socket_master_rank:
                print(
                    'Rank: {} - now holding onto raw data + results has {} free memory'
                    ''.format(self.mpi_rank,
                              format_size(get_available_memory())))

            t_start_2 = tm.time()
            self._write_results_chunk()

            # NOW, update the positions. Users are NOT allowed to touch start and end pos
            self.__start_pos = self.__end_pos
            # Leaving in this provision that will allow restarting of processes
            if self.mpi_size == 1:
                self.h5_results_grp.attrs['last_pixel'] = self.__end_pos
            # Child classes don't even have to worry about flushing. Process will do it.
            self.h5_main.file.flush()

            dump_time = np.round(tm.time() - t_start_2, decimals=2)
            write_times.put(dump_time / num_jobs_in_batch)

            if self.verbose:
                print('Rank {} - wrote its {} pixel chunk in {}'.format(
                    self.mpi_rank, num_jobs_in_batch, format_time(dump_time)))

            time_remaining = (self.__rank_end_pos - self.__end_pos) * \
                             (compute_times.get_mean() + write_times.get_mean())

            if self.verbose or self.mpi_rank == 0:
                percent_complete = int(100 *
                                       (self.__end_pos - orig_rank_start) /
                                       (self.__rank_end_pos - orig_rank_start))
                print('Rank {} - {}% complete. Time remaining: {}'.format(
                    self.mpi_rank, percent_complete,
                    format_time(time_remaining)))

            # All ranks should mark the pixels for this batch as completed. 'last_pixel' attribute will be updated later
            # Setting each section to 1 independently
            for curr_slice in integers_to_slices(self.__pixels_in_batch):
                self._h5_status_dset[curr_slice] = 1

            self._read_data_chunk()

        if self.verbose:
            print('Rank {} - Finished computing all jobs!'.format(
                self.mpi_rank))

        if self.mpi_comm is not None:
            self.mpi_comm.barrier()

        if self.mpi_rank == 0:
            print('Finished processing the entire dataset!')

        # Update the legacy 'last_pixel' attribute here:
        if self.mpi_rank == 0:
            self.h5_results_grp.attrs['last_pixel'] = self.h5_main.shape[0]

        return self.h5_results_grp
Example #6
0
    def __set_memory(self, man_mem_limit=None, mem_multiplier=1.0):
        """
        Checks memory capabilities of each node and sets the recommended data
        chunk sizes to be used by analysis methods.
        This function can work with clusters with heterogeneous memory sizes
        (e.g. CADES SHPC Condo).

        Parameters
        ----------
        man_mem_limit : uint, optional, Default = None (all available memory)
            The amount a memory in Mb to use in the computation
        mem_multiplier : float, optional. Default = 1
            mem_multiplier is the number that will be multiplied with the
            (byte) size of a single position in the source dataset in order to
            better estimate the number of positions that can be processed at
            any given time (how many pixels of the source and results datasets
            can be retained in memory). The default value of 1.0 only accounts
            for the source dataset. A value greater than 1 would account for
            the size of results datasets as well. For example, if the result
            dataset is the same size and precision as the source dataset,
            the multiplier will be 2 (1 for source, 1 for result)
        """
        if not isinstance(mem_multiplier, float):
            raise TypeError('mem_multiplier must be a floating point number')
        mem_multiplier = abs(mem_multiplier)
        if mem_multiplier < 1:
            raise ValueError('mem_multiplier must be at least 1')

        avail_mem_bytes = get_available_memory()  # in bytes
        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            # expected to be the same for all ranks so just use this.
            print('Rank {} - on socket with {} cores and {} avail. RAM shared '
                  'by {} ranks each given {} cores'
                  '.'.format(self.__socket_master_rank, psutil.cpu_count(),
                             format_size(avail_mem_bytes),
                             self.__ranks_on_socket, self._cores))

        if man_mem_limit is None:
            man_mem_limit = avail_mem_bytes
        else:
            if not isinstance(man_mem_limit, int):
                raise TypeError('man_mem_limit must be a whole number')
            # Note that man_mem_limit is specified in mega bytes
            man_mem_limit = abs(man_mem_limit) * 1024**2  # in bytes
            if self.verbose and self.mpi_rank == 0:
                print('User has requested to use no more than {} of memory'
                      '.'.format(format_size(man_mem_limit)))

        max_mem_bytes = min(avail_mem_bytes, man_mem_limit)

        # Remember that multiple processes (either via MPI or joblib) will share this socket
        # This makes logical sense but there's always too much free memory and the
        # cores are starved.
        max_mem_per_worker = max_mem_bytes / (self._cores *
                                              self.__ranks_on_socket)
        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            print('Rank {}: Each of the {} workers on this socket are allowed '
                  'to use {} of RAM'
                  '.'.format(self.mpi_rank,
                             self._cores * self.__ranks_on_socket,
                             format_size(max_mem_per_worker)))

        # Now calculate the number of positions OF RAW DATA ONLY that can be
        # stored in memory in one go PER worker
        self.__bytes_per_pos = self.h5_main.dtype.itemsize * self.h5_main.shape[
            1]
        if self.verbose and self.mpi_rank == 0:
            print('Each position in the SOURCE dataset is {} large'
                  '.'.format(format_size(self.__bytes_per_pos)))
        # Now multiply this with a factor that takes into account the expected
        # sizes of the results (Final and intermediate) datasets.
        self.__bytes_per_pos *= mem_multiplier
        if self.verbose and self.mpi_rank == 0 and mem_multiplier > 1:
            print('Each position of the source and results dataset(s) is {} '
                  'large.'.format(format_size(self.__bytes_per_pos)))

        self._max_pos_per_read = int(
            np.floor(max_mem_per_worker / self.__bytes_per_pos))

        if self.verbose and self.mpi_rank == self.__socket_master_rank:
            title = 'SOURCE dataset only'
            if mem_multiplier > 1:
                title = 'source and result(s) datasets'
            # expected to be the same for all ranks so just use this.
            print('Rank {}: Workers on this socket allowed to read {} '
                  'positions of the {} per chunk'
                  '.'.format(self.mpi_rank, self._max_pos_per_read, title))
Example #7
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    :param h5_main: dataset which SVD was performed on
    :type h5_main: hdf5 Dataset
    
    :param components: 
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    :type components: {int, iterable of int, slice} optional

    :param cores: How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    :type cores: int, optional
    
    :param max_RAM_mb: Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb
    :type max_RAM_mb: int, optional
    
    :raise: KeyError if SVD results not found 

    :returns: rebu dataset
    :rtype: HDF5 Dataset

    """

    if not isinstance(h5_main, USIDataset):
        h5_main = USIDataset(h5_main)

    comp_slice, num_comps = get_component_slice(
        components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)
    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError(
            'SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)
    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize +
                   h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))

    if batch_size < 0:
        print('Batches listed were negative', batch_size)
        batch_size = 100

    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batches should be {} Mb each.'.format(mem_per_pix * batch_size /
                                                 1024.0**2))
    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print(
        'Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp,
                                    rebuild,
                                    'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'),
                                    get_attr(h5_main, 'units'),
                                    None,
                                    None,
                                    h5_pos_inds=h5_main.h5_pos_inds,
                                    h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds,
                                    h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks,
                                    compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(
            comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Example #8
0
########################################################################################################################
# **Case 4**: The same number of a few independent computations but each of these computations are expected to be
# lengthy. In this case, the overhead of configuring the CPU core for parallel computing is worth the benefit of
# parallel computation. Hence, the function will allow the use of the 3 cores even though the number of computations is
# small.
recommeded_cores = comp_utils.recommend_cpu_cores(num_jobs, requested_cores=requested_cores, lengthy_computation=True)
print('Recommended number of CPU cores for {} independent, SLOW, and parallel '
      'computations using the requested {} CPU cores is {}'.format(num_jobs, requested_cores, recommeded_cores))

########################################################################################################################
# get_available_memory()
# ----------------------
# Among the many best-practices we follow when developing a new data analysis or processing class is memory-safe
# computation. This handy function helps us quickly get the available memory. Note that this function returns the
# available memory in bytes. So, we have converted it to gigabytes here:
print('Available memory in this machine: {} GB'.format(comp_utils.get_available_memory() / 1024 ** 3))

########################################################################################################################
# The following functions are harder to demonstrate in this document since they are meant for a cluster of computers
# rather than a conventional personal computer.
#
# get_MPI()
# ---------
# This function is useful for getting a handle to the Message Passing Interface (MPI) communicator if one is available.
# If the ``mpi4py`` package is not installed or if ``mpi4py`` is being run over a single CPU, this function returns
# ``None`` instead. Given that this documentation was generated by a single small virtual machine without the invoking
# ``mpirun`` or ``mpixec``, the following call to ``get_MPI()`` should return ``None``
print(comp_utils.get_MPI())

# group_ranks_by_socket()
# -----------------------