Python USIDataset.sizeof Examples

Programming Language: Python

Namespace/Package Name: pyUSID

Class/Type: USIDataset

Method/Function: __sizeof__

Examples at hotexamples.com: 3

Python USIDataset.__sizeof__ - 3 examples found. These are the top rated real world Python examples of pyUSID.USIDataset.__sizeof__ extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

USIDataset(21)

__sizeof__(2)

get_n_dim_form(1)

get_pos_values(1)

get_spec_values(1)

Example #1

Show file

File: svd_utils.py Project: rajgiriUW/ffta

class SVD(Process):
    """
    This class provides a file-wrapper around the :meth:`sklearn.utils.extmath.randomized_svd` function.
    In other words, it extracts and then reformats the data present in the provided :class:`pyUSID.USIDataset` object,
    performs the randomized SVD operation and writes the results back to the USID HDF5 file after
    formatting the results in an USID compliant manner.
    """
    def __init__(self, h5_main, num_components=None, **kwargs):
        """
        Perform the SVD decomposition on the selected dataset and write the results to h5 file.
        
        h5_target_group : h5py.Group, optional. Default = None
            Location where to look for existing results and to place newly
            computed results. Use this kwarg if the results need to be written
            to a different HDF5 file. By default, this value is set to the
            parent group containing `h5_main`
            
        :param h5_main: USID Main HDF5 dataset that will be decomposed
        :type h5_main: :class:`pyUSID.USIDataset` object
        
        :param num_components: Number of components to decompose h5_main into.  Default None.
        :type num_components: int, optional
        
        :param kwargs: Arguments to be sent to Process
        :type kwargs:
            
        """
        super(SVD, self).__init__(h5_main, 'SVD', **kwargs)
        '''
        Calculate the size of the main data in memory and compare to max_mem
        We use the minimum of the actual dtype's itemsize and float32 since we
        don't want to read it in yet and do the proper type conversions.
        '''
        n_samples, n_features = h5_main.shape
        self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype(
            h5_main)

        if num_components is None:
            num_components = min(n_samples, n_features)
        else:
            num_components = min(n_samples, n_features, num_components)

        self.num_components = num_components

        # Check that we can actually compute the SVD with the selected number of components
        self._check_available_mem()

        self.parms_dict = {'num_components': num_components}
        self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates(
        )

        # supercharge h5_main!
        self.h5_main = USIDataset(self.h5_main)

        self.__u = None
        self.__v = None
        self.__s = None

    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        :param override: Set to true to recompute results if prior results are available. Else, returns existing results
        :type override: bool, optional. default = False
            
        :returns: tuple (u_mat, self.__s, v_mat)
            WHERE
            numpy.ndarray u_mat is abundance matrix
            numpy.ndarray self.__s is variance vector
            numpy.ndarray v_mat is eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups,
                          list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(
                    self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(
            self.h5_main),
                                                      self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(
            format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u,
                                           h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(
                                               self.__u.shape[1]),
                                                                  axis=0))
        if not success:
            raise ValueError(
                'Could not reshape U to N-Dimensional dataset! Error:' +
                success)

        # When the source dataset has a singular valued spectroscopic dimension
        # stack_real_to_target causes V to lose all its dimensions
        if self.__v.ndim == 0:
            # However, we want V to be 2D:
            self.__v = np.atleast_2d(self.__v)

        v_mat, success = reshape_to_n_dims(self.__v,
                                           h5_pos=np.expand_dims(np.arange(
                                               self.__u.shape[1]),
                                                                 axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError(
                'Could not reshape V to N-Dimensional dataset! Error:' +
                success)

        return u_mat, self.__s, v_mat

    def compute(self, override=False):
        """
        Computes SVD (by calling test_on_subset() if it has not already been called) and writes results to file.
        Consider calling test() to check results before writing to file. Results are deleted from memory
        upon writing to the HDF5 file

        :param override: Set to true to recompute results if prior results are available. Else, returns existing results
        :type override : bool, optional. default = False
            
        :returns: HDF5 Group containing all the results
        :rtype: h5py.Group object
            
        """
        if self.__u is None and self.__v is None and self.__s is None:
            self.test(override=override)

        if self.h5_results_grp is None:
            self._write_results_chunk()
            self.delete_results()

        h5_group = self.h5_results_grp

        return h5_group

    def delete_results(self):
        """
        Deletes results from memory.
        """
        del self.__u, self.__s, self.__v
        self.__u = None
        self.__v = None
        self.__s = None

    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(
            self.h5_main,
            self.process_name,
            h5_parent_group=self._h5_target_group)
        self.h5_results_grp = h5_svd_group
        self._write_source_dset_provenance()

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group,
                                  np.float32(self.__u),
                                  'U',
                                  'Abundance',
                                  'a.u.',
                                  None,
                                  comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds,
                                  h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32,
                                  chunks=calc_chunks(self.__u.shape,
                                                     np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group,
                                  self.__v,
                                  'V',
                                  get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.',
                                  comp_dim,
                                  None,
                                  h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(
                                      self.__v.shape,
                                      self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))
        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main,
                                                  self.h5_main.attrs[key],
                                                  return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(
            self._status_dset_name,
            data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]

    def _check_available_mem(self):
        """
        Check that there is enough memory to perform the SVD decomposition.
        
        :raise: MemoryError if not enough memory found
        
        :returns: True is enough memory found, False otherwise.
        :rtype: bool
            

        """
        if self.verbose:
            print('Checking memory availability.')
        n_samples, n_features = self.h5_main.shape
        s_mem_per_comp = np.float32(0).itemsize
        u_mem_per_comp = np.float32(0).itemsize * n_samples
        v_mem_per_comp = self.h5_main.dtype.itemsize * n_features

        mem_per_comp = s_mem_per_comp + u_mem_per_comp + v_mem_per_comp
        max_mem = get_available_memory()
        avail_mem = 0.75 * max_mem
        free_mem = avail_mem - self.h5_main.__sizeof__()

        if free_mem <= 0:
            error_message = 'Cannot load main dataset into memory.\n' + \
                            'Available memory is {}.  Dataset needs {}.'.format(avail_mem,
                                                                                self.h5_main.__sizeof__())
            raise MemoryError(error_message)

        if self.verbose:
            print('Memory available for SVD is {}.'.format(free_mem))
            print('Memory needed per component is {}.'.format(mem_per_comp))

        cant_svd = (free_mem - self.num_components * mem_per_comp) <= 0

        if cant_svd:
            max_comps = np.floor(free_mem / mem_per_comp, dtype=int)
            error_message = 'Not enough free memory for performing SVD with requested number of parameters.\n' + \
                            'Maximum possible parameters is {}.'.format(max_comps)
            raise MemoryError(error_message)

Example #2

Show file

class SVD(Process):
    def __init__(self, h5_main, num_components=None, **kwargs):
        """
        Perform the SVD decomposition on the selected dataset and write the results to h5 file.

        Parameters
        ----------
        h5_main : USIDataset
            Dataset to be decomposed.
        num_components : int, optional
            Number of components to decompose h5_main into.  Default None.
        kwargs
            Arguments to be sent to Process
        """
        super(SVD, self).__init__(h5_main, **kwargs)
        self.process_name = 'SVD'
        '''
        Calculate the size of the main data in memory and compare to max_mem
        We use the minimum of the actual dtype's itemsize and float32 since we
        don't want to read it in yet and do the proper type conversions.
        '''
        n_samples, n_features = h5_main.shape
        self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype(
            h5_main)

        if num_components is None:
            num_components = min(n_samples, n_features)
        else:
            num_components = min(n_samples, n_features, num_components)

        self.num_components = num_components

        # Check that we can actually compute the SVD with the selected number of components
        self._check_available_mem()

        self.parms_dict = {'num_components': num_components}
        self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates(
        )

        # supercharge h5_main!
        self.h5_main = USIDataset(self.h5_main)

        self.__u = None
        self.__v = None
        self.__s = None

    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        U : numpy.ndarray
            Abundance matrix
        S : numpy.ndarray
            variance vector
        V : numpy.ndarray
            eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups,
                          list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(
                    self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(
            self.h5_main),
                                                      self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(
            format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u,
                                           h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(
                                               self.__u.shape[1]),
                                                                  axis=0))
        if not success:
            raise ValueError(
                'Could not reshape U to N-Dimensional dataset! Error:' +
                success)

        v_mat, success = reshape_to_n_dims(self.__v,
                                           h5_pos=np.expand_dims(np.arange(
                                               self.__u.shape[1]),
                                                                 axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError(
                'Could not reshape V to N-Dimensional dataset! Error:' +
                success)

        return u_mat, self.__s, v_mat

    def compute(self, override=False):
        """
        Computes SVD (by calling test_on_subset() if it has not already been called) and writes results to file.
        Consider calling test() to check results before writing to file. Results are deleted from memory
        upon writing to the HDF5 file

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
         h5_results_grp : h5py.Datagroup object
            Datagroup containing all the results
        """
        if self.__u is None and self.__v is None and self.__s is None:
            self.test(override=override)

        if self.h5_results_grp is None:
            self._write_results_chunk()
            self.delete_results()

        h5_group = self.h5_results_grp

        return h5_group

    def delete_results(self):
        """
        Deletes results from memory.
        """
        del self.__u, self.__s, self.__v
        self.__u = None
        self.__v = None
        self.__s = None

    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name)
        self.h5_results_grp = h5_svd_group

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(
            h5_svd_group, {
                'svd_method': 'sklearn-randomized',
                'last_pixel': self.h5_main.shape[0]
            })

        h5_u = write_main_dataset(h5_svd_group,
                                  np.float32(self.__u),
                                  'U',
                                  'Abundance',
                                  'a.u.',
                                  None,
                                  comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds,
                                  h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32,
                                  chunks=calc_chunks(self.__u.shape,
                                                     np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group,
                                  self.__v,
                                  'V',
                                  get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.',
                                  comp_dim,
                                  None,
                                  h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(
                                      self.__v.shape,
                                      self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))
        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main,
                                                  self.h5_main.attrs[key],
                                                  return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

    def _check_available_mem(self):
        """
        Check that there is enough memory to perform the SVD decomposition.

        Returns
        -------
        sufficient_mem : bool
            True is enough memory found, False otherwise.

        """
        if self.verbose:
            print('Checking memory availability.')
        n_samples, n_features = self.h5_main.shape
        s_mem_per_comp = np.float32(0).itemsize
        u_mem_per_comp = np.float32(0).itemsize * n_samples
        v_mem_per_comp = self.h5_main.dtype.itemsize * n_features

        mem_per_comp = s_mem_per_comp + u_mem_per_comp + v_mem_per_comp
        avail_mem = 0.75 * self._max_mem_mb * 1024**2
        free_mem = avail_mem - self.h5_main.__sizeof__()

        if free_mem <= 0:
            error_message = 'Cannot load main dataset into memory.\n' + \
                            'Available memory is {}.  Dataset needs {}.'.format(avail_mem,
                                                                                self.h5_main.__sizeof__())
            raise MemoryError()

        if self.verbose:
            print('Memory available for SVD is {}.'.format(free_mem))
            print('Memory needed per component is {}.'.format(mem_per_comp))

        cant_svd = (free_mem - self.num_components * mem_per_comp) <= 0

        if cant_svd:
            max_comps = np.floor(free_mem / mem_per_comp, dtype=int)
            error_message = 'Not enough free memory for performing SVD with requested number of parameters.\n' + \
                            'Maximum possible parameters is {}.'.format(max_comps)
            raise MemoryError(error_message)

Example #3

Show file

File: svd_utils.py Project: pycroscopy/pycroscopy

class SVD(Process):
    """
    This class provides a file-wrapper around the :meth:`sklearn.utils.extmath.randomized_svd` function.
    In other words, it extracts and then reformats the data present in the provided :class:`pyUSID.USIDataset` object,
    performs the randomized SVD operation and writes the results back to the USID HDF5 file after
    formatting the results in an USID compliant manner.
    """

    def __init__(self, h5_main, num_components=None, **kwargs):
        """
        Perform the SVD decomposition on the selected dataset and write the results to h5 file.

        Parameters
        ----------
        h5_main : :class:`pyUSID.USIDataset` object
            USID Main HDF5 dataset that will be decomposed
        num_components : int, optional
            Number of components to decompose h5_main into.  Default None.
        kwargs
            Arguments to be sent to Process
        """
        super(SVD, self).__init__(h5_main, **kwargs)
        self.process_name = 'SVD'

        '''
        Calculate the size of the main data in memory and compare to max_mem
        We use the minimum of the actual dtype's itemsize and float32 since we
        don't want to read it in yet and do the proper type conversions.
        '''
        n_samples, n_features = h5_main.shape
        self.data_transform_func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_main)

        if num_components is None:
            num_components = min(n_samples, n_features)
        else:
            num_components = min(n_samples, n_features, num_components)

        self.num_components = num_components

        # Check that we can actually compute the SVD with the selected number of components
        self._check_available_mem()

        self.parms_dict = {'num_components': num_components}
        self.duplicate_h5_groups, self.partial_h5_groups = self._check_for_duplicates()

        # supercharge h5_main!
        self.h5_main = USIDataset(self.h5_main)

        self.__u = None
        self.__v = None
        self.__s = None

    def test(self, override=False):
        """
        Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to
        write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the
        input matrix.

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
        U : :class:`numpy.ndarray`
            Abundance matrix
        S : :class:`numpy.ndarray`
            variance vector
        V : :class:`numpy.ndarray`
            eigenvector matrix
        """
        '''
        Check if a number of compnents has been set and ensure that the number is less than
        the minimum axis length of the data.  If both conditions are met, use fsvd.  If not
        use the regular svd.

        C.Smith -- We might need to put a lower limit on num_comps in the future.  I don't
                   know enough about svd to be sure.
        '''
        if not override:
            if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0:
                self.h5_results_grp = self.duplicate_h5_groups[-1]
                print('Returning previously computed results from: {}'.format(self.h5_results_grp.name))
                print('set the "override" flag to True to recompute results')
                return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \
                       reshape_to_n_dims(self.h5_results_grp['V'])[0]

        self.h5_results_grp = None

        t1 = time.time()

        self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components,
                                                      n_iter=3)
        self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype)

        print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1)))

        u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds,
                                           h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0))
        if not success:
            raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success)

        v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1),
                                           h5_spec=self.h5_main.h5_spec_inds)
        if not success:
            raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success)

        return u_mat, self.__s, v_mat

    def compute(self, override=False):
        """
        Computes SVD (by calling test_on_subset() if it has not already been called) and writes results to file.
        Consider calling test() to check results before writing to file. Results are deleted from memory
        upon writing to the HDF5 file

        Parameters
        ----------
        override : bool, optional. default = False
            Set to true to recompute results if prior results are available. Else, returns existing results

        Returns
        -------
         h5_results_grp : :class:`h5py.Group`  object
            HDF5 Group containing all the results
        """
        if self.__u is None and self.__v is None and self.__s is None:
            self.test(override=override)

        if self.h5_results_grp is None:
            self._write_results_chunk()
            self.delete_results()

        h5_group = self.h5_results_grp

        return h5_group

    def delete_results(self):
        """
        Deletes results from memory.
        """
        del self.__u, self.__s, self.__v
        self.__u = None
        self.__v = None
        self.__s = None

    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name)
        self.h5_results_grp = h5_svd_group

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))

        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name,
                                                           data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]

    def _check_available_mem(self):
        """
        Check that there is enough memory to perform the SVD decomposition.

        Returns
        -------
        sufficient_mem : bool
            True is enough memory found, False otherwise.

        """
        if self.verbose:
            print('Checking memory availability.')
        n_samples, n_features = self.h5_main.shape
        s_mem_per_comp = np.float32(0).itemsize
        u_mem_per_comp = np.float32(0).itemsize * n_samples
        v_mem_per_comp = self.h5_main.dtype.itemsize * n_features

        mem_per_comp = s_mem_per_comp + u_mem_per_comp + v_mem_per_comp
        avail_mem = 0.75 * self._max_mem_mb * 1024 ** 2
        free_mem = avail_mem - self.h5_main.__sizeof__()

        if free_mem <= 0:
            error_message = 'Cannot load main dataset into memory.\n' + \
                            'Available memory is {}.  Dataset needs {}.'.format(avail_mem,
                                                                                self.h5_main.__sizeof__())
            raise MemoryError()

        if self.verbose:
            print('Memory available for SVD is {}.'.format(free_mem))
            print('Memory needed per component is {}.'.format(mem_per_comp))

        cant_svd = (free_mem - self.num_components * mem_per_comp) <= 0

        if cant_svd:
            max_comps = np.floor(free_mem / mem_per_comp, dtype=int)
            error_message = 'Not enough free memory for performing SVD with requested number of parameters.\n' + \
                            'Maximum possible parameters is {}.'.format(max_comps)
            raise MemoryError(error_message)

Python USIDataset.__sizeof__ Examples

Python USIDataset.sizeof Examples