Ejemplo n.º 1
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(
        components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)
    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError(
            'SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)
    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize +
                   h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size /
                                                1024.0**2))
    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print(
        'Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp,
                                    rebuild,
                                    'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'),
                                    get_attr(h5_main, 'units'),
                                    None,
                                    None,
                                    h5_pos_inds=h5_main.h5_pos_inds,
                                    h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds,
                                    h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks,
                                    compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(
            comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Ejemplo n.º 2
0
    def translate(self, h5_path, force_patch=False, **kwargs):
        """
        Add the needed references and attributes to the h5 file that are not created by the
        LabView data aquisition program.

        Parameters
        ----------
        h5_path : str
            path to the h5 file
        force_patch : bool, optional
            Should the check to see if the file has already been patched be ignored.
            Default False.

        Returns
        -------
        h5_file : h5py.File
            patched hdf5 file

        """
        # Open the file and check if a patch is needed
        h5_file = h5py.File(os.path.abspath(h5_path), 'r+')
        if h5_file.attrs.get('translator') is not None and not force_patch:
            print('File is already Pycroscopy ready.')
            return h5_file
        '''
        Get the list of all Raw_Data Datasets
        Loop over the list and update the needed attributes
        '''
        raw_list = find_dataset(h5_file, 'Raw_Data')
        for h5_raw in raw_list:
            if 'quantity' not in h5_raw.attrs:
                h5_raw.attrs['quantity'] = 'quantity'
            if 'units' not in h5_raw.attrs:
                h5_raw.attrs['units'] = 'a.u.'

            # Grab the channel and measurement group of the data to check some needed attributes
            h5_chan = h5_raw.parent
            try:
                c_type = get_attr(h5_chan, 'channel_type')

            except KeyError:
                warn_str = "'channel_type' was not found as an attribute of {}.\n".format(
                    h5_chan.name)
                warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \
                            "please run the following piece of code.  Afterwards, run this function again.\n" + \
                            "CODE: " \
                            "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name)
                warn(warn_str)
                return h5_file

            except:
                raise

            if c_type != 'BE':
                continue

            h5_meas = h5_chan.parent
            h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps']

            # Get the object handles for the Indices and Values datasets
            h5_pos_inds = h5_chan['Position_Indices']
            h5_pos_vals = h5_chan['Position_Values']
            h5_spec_inds = h5_chan['Spectroscopic_Indices']
            h5_spec_vals = h5_chan['Spectroscopic_Values']

            # Make sure we have correct spectroscopic indices for the given values
            ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()])
            if not np.allclose(ds_spec_inds, h5_spec_inds[()]):
                h5_spec_inds[:, :] = ds_spec_inds[:, :]
                h5_file.flush()

            # Get the labels and units for the Spectroscopic datasets
            h5_spec_labels = h5_spec_inds.attrs['labels']
            inds_and_vals = [
                h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals
            ]
            for dset in inds_and_vals:
                spec_labels = dset.attrs['labels']
                try:
                    spec_units = dset.attrs['units']

                    if len(spec_units) != len(spec_labels):
                        raise KeyError

                except KeyError:
                    dset['units'] = ['' for _ in spec_labels]
                except:
                    raise

            for ilabel, label in enumerate(h5_spec_labels):
                label_slice = (slice(ilabel, ilabel + 1), slice(None))
                if label == '':
                    label = 'Step'
                h5_spec_inds.attrs[label] = h5_spec_inds.regionref[label_slice]
                h5_spec_vals.attrs[label] = h5_spec_vals.regionref[label_slice]

            # Link the references to the Indices and Values datasets to the Raw_Data
            link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds,
                         h5_spec_vals)

            # Also link the Bin_Frequencies and Bin_Wfm_Type datasets
            h5_freqs = h5_chan['Bin_Frequencies']
            aux_dset_names = ['Bin_Frequencies']
            aux_dset_refs = [h5_freqs.ref]
            check_and_link_ancillary(h5_raw,
                                     aux_dset_names,
                                     anc_refs=aux_dset_refs)
            '''
            Get all SHO_Fit groups for the Raw_Data and loop over them
            Get the Guess and Spectroscopic Datasets for each SHO_Fit group
            '''
            sho_list = find_results_groups(h5_raw, 'SHO_Fit')
            for h5_sho in sho_list:
                h5_sho_guess = h5_sho['Guess']
                h5_sho_spec_inds = h5_sho['Spectroscopic_Indices']
                h5_sho_spec_vals = h5_sho['Spectroscopic_Values']

                # Make sure we have correct spectroscopic indices for the given values
                ds_sho_spec_inds = create_spec_inds_from_vals(
                    h5_sho_spec_inds[()])
                if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]):
                    h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :]

                # Get the labels and units for the Spectroscopic datasets
                h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels')
                link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals,
                             h5_sho_spec_inds, h5_sho_spec_vals)
                sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals]

                for dset in sho_inds_and_vals:
                    spec_labels = get_attr(dset, 'labels')
                    try:
                        spec_units = get_attr(dset, 'units')

                        if len(spec_units) != len(spec_labels):
                            raise KeyError

                    except KeyError:
                        spec_units = [''.encode('utf-8') for _ in spec_labels]
                        dset.attrs['units'] = spec_units

                    except:
                        raise

                # Make region references in the
                for ilabel, label in enumerate(h5_sho_spec_labels):
                    label_slice = (slice(ilabel, ilabel + 1), slice(None))
                    if label == '':
                        label = 'Step'.encode('utf-8')
                    h5_sho_spec_inds.attrs[label] = h5_sho_spec_inds.regionref[
                        label_slice]
                    h5_sho_spec_vals.attrs[label] = h5_sho_spec_vals.regionref[
                        label_slice]

            h5_file.flush()

        h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8')

        return h5_file
Ejemplo n.º 3
0
def plot_svd(h5_main, savefig=False, num_plots=16, **kwargs):
    '''
    Replots the SVD showing the skree, abundance maps, and eigenvectors.
    If h5_main is a Dataset, it will default to the most recent SVD group from that
    Dataset.
    If h5_main is the results group, then it will plot the values for that group.
    
    :param h5_main:
    :type h5_main: USIDataset or h5py Dataset or h5py Group
    
    :param savefig: Saves the figures to disk with some default names
    :type savefig: bool, optional
        
    :param num_plots: Default number of eigenvectors and abundance plots to show
    :type num_plots: int
        
    :param kwargs: keyword arguments for svd filtering
    :type kwarrgs: dict, optional
        
    '''

    if isinstance(h5_main, h5py.Group):

        _U = find_dataset(h5_main, 'U')[-1]
        _V = find_dataset(h5_main, 'V')[-1]
        units = 'arbitrary (a.u.)'
        h5_spec_vals = np.arange(_V.shape[1])
        h5_svd_group = _U.parent

    else:

        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]
        units = h5_main.attrs['quantity']
        h5_spec_vals = h5_main.get_spec_values('Time')

    h5_U = h5_svd_group['U']
    h5_V = h5_svd_group['V']
    h5_S = h5_svd_group['S']

    _U = USIDataset(h5_U)
    [num_rows, num_cols] = _U.pos_dim_sizes

    abun_maps = np.reshape(h5_U[:, :16], (num_rows, num_cols, -1))
    eigen_vecs = h5_V[:16, :]

    skree_sum = np.zeros(h5_S.shape)
    for i in range(h5_S.shape[0]):
        skree_sum[i] = np.sum(h5_S[:i]) / np.sum(h5_S)

    plt.figure()
    plt.plot(skree_sum, 'bo')
    plt.title('Cumulative Variance')
    plt.xlabel('Total Components')
    plt.ylabel('Total variance ratio (a.u.)')

    if savefig:
        plt.savefig('Cumulative_variance_plot.png')

    fig_skree, axes = plot_utils.plot_scree(h5_S, title='Scree plot')
    fig_skree.tight_layout()

    if savefig:
        plt.savefig('Scree_plot.png')

    fig_abun, axes = plot_utils.plot_map_stack(abun_maps,
                                               num_comps=num_plots,
                                               title='SVD Abundance Maps',
                                               color_bar_mode='single',
                                               cmap='inferno',
                                               reverse_dims=True,
                                               fig_mult=(3.5, 3.5),
                                               facecolor='white',
                                               **kwargs)
    fig_abun.tight_layout()
    if savefig:
        plt.savefig('Abundance_maps.png')

    fig_eigvec, axes = plot_utils.plot_curves(h5_spec_vals * 1e3,
                                              eigen_vecs,
                                              use_rainbow_plots=False,
                                              x_label='Time (ms)',
                                              y_label=units,
                                              num_plots=num_plots,
                                              subtitle_prefix='Component',
                                              title='SVD Eigenvectors',
                                              evenly_spaced=False,
                                              **kwargs)
    fig_eigvec.tight_layout()
    if savefig:
        plt.savefig('Eigenvectors.png')

    return
Ejemplo n.º 4
0
    def _check_for_old_fit(self):
        """
        Returns three lists of h5py.Dataset objects where the group contained:
            1. Completed guess only
            2. Partial Fit
            3. Completed Fit

        Returns
        -------

        """
        # First find all groups that match the basic condition of matching tool name
        all_groups = find_results_groups(self.h5_main, self._fitter_name)
        if self._verbose:
            print(
                'Groups that matched the nomenclature: {}'.format(all_groups))

        # Next sort these groups into three categories:
        completed_guess = []
        partial_fits = []
        completed_fits = []

        for h5_group in all_groups:

            if 'Fit' in h5_group.keys():
                # check group for fit attribute

                h5_fit = h5_group['Fit']

                # check Fit dataset against parms_dict
                if not check_for_matching_attrs(h5_fit,
                                                new_parms=self._parms_dict,
                                                verbose=self._verbose):
                    if self._verbose:
                        print('{} did not match the given parameters'.format(
                            h5_fit.name))
                    continue

                # sort this dataset:
                try:
                    last_pix = get_attr(h5_fit, 'last_pixel')
                except KeyError:
                    last_pix = None

                # For now skip any fits that are missing 'last_pixel'
                if last_pix is None:
                    continue
                elif last_pix < self.h5_main.shape[0]:
                    partial_fits.append(h5_fit.parent)
                else:
                    completed_fits.append(h5_fit)
            else:
                if 'Guess' in h5_group.keys():
                    h5_guess = h5_group['Guess']

                    # sort this dataset:
                    try:
                        last_pix = get_attr(h5_guess, 'last_pixel')
                    except KeyError:
                        last_pix = None

                    # For now skip any fits that are missing 'last_pixel'
                    if last_pix is None:
                        continue
                    elif last_pix == self.h5_main.shape[0]:
                        if self._verbose:
                            print('{} was a completed Guess'.format(
                                h5_guess.name))
                        completed_guess.append(h5_guess)
                    else:
                        if self._verbose:
                            print(
                                '{} did not not have completed Guesses'.format(
                                    h5_guess.name))
                else:
                    if self._verbose:
                        print(
                            '{} did not even have Guess. Categorizing as defective Group'
                            .format(h5_group.name))

        return completed_guess, partial_fits, completed_fits
Ejemplo n.º 5
0
    def translate(self, h5_path, force_patch=False, **kwargs):
        """
        Add the needed references and attributes to the h5 file that are not created by the
        LabView data aquisition program.

        Parameters
        ----------
        h5_path : str
            path to the h5 file
        force_patch : bool, optional
            Should the check to see if the file has already been patched be ignored.
            Default False.

        Returns
        -------
        h5_file : str
            path to the patched dataset

        """
        # Open the file and check if a patch is needed
        h5_file = h5py.File(os.path.abspath(h5_path), 'r+')
        if h5_file.attrs.get('translator') is not None and not force_patch:
            print('File is already Pycroscopy ready.')
            h5_file.close()
            return h5_path
        '''
        Get the list of all Raw_Data Datasets
        Loop over the list and update the needed attributes
        '''
        raw_list = find_dataset(h5_file, 'Raw_Data')
        for h5_raw in raw_list:
            if 'quantity' not in h5_raw.attrs:
                h5_raw.attrs['quantity'] = 'quantity'
            if 'units' not in h5_raw.attrs:
                h5_raw.attrs['units'] = 'a.u.'

            # Grab the channel and measurement group of the data to check some needed attributes
            h5_chan = h5_raw.parent
            try:
                c_type = get_attr(h5_chan, 'channel_type')

            except KeyError:
                warn_str = "'channel_type' was not found as an attribute of {}.\n".format(
                    h5_chan.name)
                warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \
                            "please run the following piece of code.  Afterwards, run this function again.\n" + \
                            "CODE: " \
                            "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name)
                warn(warn_str)
                h5_file.close()
                return h5_path

            except:
                raise

            if c_type != 'BE':
                continue

            h5_meas = h5_chan.parent
            h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps']

            # Get the object handles for the Indices and Values datasets
            h5_pos_inds = h5_chan['Position_Indices']
            h5_pos_vals = h5_chan['Position_Values']
            h5_spec_inds = h5_chan['Spectroscopic_Indices']
            h5_spec_vals = h5_chan['Spectroscopic_Values']

            # Make sure we have correct spectroscopic indices for the given values
            ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()])
            if not np.allclose(ds_spec_inds, h5_spec_inds[()]):
                h5_spec_inds[:, :] = ds_spec_inds[:, :]
                h5_file.flush()

            # Get the labels and units for the Spectroscopic datasets
            h5_spec_labels = h5_spec_inds.attrs['labels']
            inds_and_vals = [
                h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals
            ]
            for dset in inds_and_vals:
                spec_labels = dset.attrs['labels']
                try:
                    spec_units = dset.attrs['units']

                    if len(spec_units) != len(spec_labels):
                        raise KeyError

                except KeyError:
                    dset['units'] = ['' for _ in spec_labels]
                except:
                    raise
            """"
            In early versions, too many spectroscopic dimension labels and 
            units were listed compared to the number of rows. Remove here:
            """
            remove_non_exist_spec_dim_labs(h5_spec_inds,
                                           h5_spec_vals,
                                           h5_meas,
                                           verbose=False)
            """
            Add back some standard metadata to be consistent with older
            BE data
            """
            missing_metadata = dict()
            if 'File_file_name' not in h5_meas.attrs.keys():
                missing_metadata['File_file_name'] = os.path.split(
                    h5_raw.file.filename)[-1].replace('.h5', '')
            if 'File_date_and_time' not in h5_meas.attrs.keys():
                try:
                    date_str = get_attr(h5_raw.file, 'date_string')
                    time_str = get_attr(h5_raw.file, 'time_string')
                    full_str = date_str.strip() + ' ' + time_str.strip()
                    """
                    convert:
                        date_string : 2018-12-05
                        time_string : 3:41:45 PM
                    to: 
                        File_date_and_time: 19-Jun-2009 18:44:56
                    """
                    try:
                        dt_obj = datetime.datetime.strptime(
                            full_str, "%Y-%m-%d %I:%M:%S %p")
                        missing_metadata[
                            'File_date_and_time'] = dt_obj.strftime(
                                '%d-%b-%Y %H:%M:%S')
                    except ValueError:
                        pass
                except KeyError:
                    pass
            # Now write to measurement group:
            if len(missing_metadata) > 0:
                write_simple_attrs(h5_meas, missing_metadata)

            # Link the references to the Indices and Values datasets to the Raw_Data
            link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds,
                         h5_spec_vals)

            # Also link the Bin_Frequencies and Bin_Wfm_Type datasets
            h5_freqs = h5_chan['Bin_Frequencies']
            aux_dset_names = ['Bin_Frequencies']
            aux_dset_refs = [h5_freqs.ref]
            check_and_link_ancillary(h5_raw,
                                     aux_dset_names,
                                     anc_refs=aux_dset_refs)
            '''
            Get all SHO_Fit groups for the Raw_Data and loop over them
            Get the Guess and Spectroscopic Datasets for each SHO_Fit group
            '''
            sho_list = find_results_groups(h5_raw, 'SHO_Fit')
            for h5_sho in sho_list:
                h5_sho_guess = h5_sho['Guess']
                h5_sho_spec_inds = h5_sho['Spectroscopic_Indices']
                h5_sho_spec_vals = h5_sho['Spectroscopic_Values']

                # Make sure we have correct spectroscopic indices for the given values
                ds_sho_spec_inds = create_spec_inds_from_vals(
                    h5_sho_spec_inds[()])
                if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]):
                    h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :]

                # Get the labels and units for the Spectroscopic datasets
                h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels')
                link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals,
                             h5_sho_spec_inds, h5_sho_spec_vals)
                sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals]

                for dset in sho_inds_and_vals:
                    spec_labels = get_attr(dset, 'labels')
                    try:
                        spec_units = get_attr(dset, 'units')

                        if len(spec_units) != len(spec_labels):
                            raise KeyError

                    except KeyError:
                        spec_units = [''.encode('utf-8') for _ in spec_labels]
                        dset.attrs['units'] = spec_units

                    except:
                        raise

            h5_file.flush()

        h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8')

        h5_file.close()

        return h5_path
Ejemplo n.º 6
0
    def translate(self, h5_path, force_patch=False, **kwargs):
        """
        Add the needed references and attributes to the h5 file that are not created by the
        LabView data aquisition program.

        Parameters
        ----------
        h5_path : str
            path to the h5 file
        force_patch : bool, optional
            Should the check to see if the file has already been patched be ignored.
            Default False.

        Returns
        -------
        h5_file : h5py.File
            patched hdf5 file

        """
        # Open the file and check if a patch is needed
        h5_file = h5py.File(os.path.abspath(h5_path), 'r+')
        if h5_file.attrs.get('translator') is not None and not force_patch:
            print('File is already Pycroscopy ready.')
            return h5_file

        '''
        Get the list of all Raw_Data Datasets
        Loop over the list and update the needed attributes
        '''
        raw_list = find_dataset(h5_file, 'Raw_Data')
        for h5_raw in raw_list:
            if 'quantity' not in h5_raw.attrs:
                h5_raw.attrs['quantity'] = 'quantity'
            if 'units' not in h5_raw.attrs:
                h5_raw.attrs['units'] = 'a.u.'

            # Grab the channel and measurement group of the data to check some needed attributes
            h5_chan = h5_raw.parent
            try:
                c_type = get_attr(h5_chan, 'channel_type')

            except KeyError:
                warn_str = "'channel_type' was not found as an attribute of {}.\n".format(h5_chan.name)
                warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \
                            "please run the following piece of code.  Afterwards, run this function again.\n" + \
                            "CODE: " \
                            "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name)
                warn(warn_str)
                return h5_file

            except:
                raise

            if c_type != 'BE':
                continue

            h5_meas = h5_chan.parent
            h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps']

            # Get the object handles for the Indices and Values datasets
            h5_pos_inds = h5_chan['Position_Indices']
            h5_pos_vals = h5_chan['Position_Values']
            h5_spec_inds = h5_chan['Spectroscopic_Indices']
            h5_spec_vals = h5_chan['Spectroscopic_Values']

            # Make sure we have correct spectroscopic indices for the given values
            ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()])
            if not np.allclose(ds_spec_inds, h5_spec_inds[()]):
                h5_spec_inds[:, :] = ds_spec_inds[:, :]
                h5_file.flush()

            # Get the labels and units for the Spectroscopic datasets
            h5_spec_labels = h5_spec_inds.attrs['labels']
            inds_and_vals = [h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals]
            for dset in inds_and_vals:
                spec_labels = dset.attrs['labels']
                try:
                    spec_units = dset.attrs['units']

                    if len(spec_units) != len(spec_labels):
                        raise KeyError

                except KeyError:
                    dset['units'] = ['' for _ in spec_labels]
                except:
                    raise

            for ilabel, label in enumerate(h5_spec_labels):
                label_slice = (slice(ilabel, ilabel + 1), slice(None))
                if label == '':
                    label = 'Step'
                h5_spec_inds.attrs[label] = h5_spec_inds.regionref[label_slice]
                h5_spec_vals.attrs[label] = h5_spec_vals.regionref[label_slice]

            # Link the references to the Indices and Values datasets to the Raw_Data
            link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals)

            # Also link the Bin_Frequencies and Bin_Wfm_Type datasets
            h5_freqs = h5_chan['Bin_Frequencies']
            aux_dset_names = ['Bin_Frequencies']
            aux_dset_refs = [h5_freqs.ref]
            check_and_link_ancillary(h5_raw, aux_dset_names, anc_refs=aux_dset_refs)

            '''
            Get all SHO_Fit groups for the Raw_Data and loop over them
            Get the Guess and Spectroscopic Datasets for each SHO_Fit group
            '''
            sho_list = find_results_groups(h5_raw, 'SHO_Fit')
            for h5_sho in sho_list:
                h5_sho_guess = h5_sho['Guess']
                h5_sho_spec_inds = h5_sho['Spectroscopic_Indices']
                h5_sho_spec_vals = h5_sho['Spectroscopic_Values']

                # Make sure we have correct spectroscopic indices for the given values
                ds_sho_spec_inds = create_spec_inds_from_vals(h5_sho_spec_inds[()])
                if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]):
                    h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :]

                # Get the labels and units for the Spectroscopic datasets
                h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels')
                link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals)
                sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals]

                for dset in sho_inds_and_vals:
                    spec_labels = get_attr(dset, 'labels')
                    try:
                        spec_units = get_attr(dset, 'units')

                        if len(spec_units) != len(spec_labels):
                            raise KeyError

                    except KeyError:
                        spec_units = [''.encode('utf-8') for _ in spec_labels]
                        dset.attrs['units'] = spec_units

                    except:
                        raise

                # Make region references in the
                for ilabel, label in enumerate(h5_sho_spec_labels):
                    label_slice = (slice(ilabel, ilabel + 1), slice(None))
                    if label == '':
                        label = 'Step'.encode('utf-8')
                    h5_sho_spec_inds.attrs[label] = h5_sho_spec_inds.regionref[label_slice]
                    h5_sho_spec_vals.attrs[label] = h5_sho_spec_vals.regionref[label_slice]

            h5_file.flush()

        h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8')

        return h5_file
Ejemplo n.º 7
0
    def _check_for_old_fit(self):
        """
        Returns three lists of h5py.Dataset objects where the group contained:
            1. Completed guess only
            2. Partial Fit
            3. Completed Fit

        Returns
        -------

        """
        # First find all groups that match the basic condition of matching tool name
        all_groups = find_results_groups(self.h5_main, self._fitter_name)
        if self._verbose:
            print('Groups that matched the nomenclature: {}'.format(all_groups))

        # Next sort these groups into three categories:
        completed_guess = []
        partial_fits = []
        completed_fits = []

        for h5_group in all_groups:

            if 'Fit' in h5_group.keys():
                # check group for fit attribute

                h5_fit = h5_group['Fit']

                # check Fit dataset against parms_dict
                if not check_for_matching_attrs(h5_fit, new_parms=self._parms_dict, verbose=self._verbose):
                    if self._verbose:
                        print('{} did not match the given parameters'.format(h5_fit.name))
                    continue

                # sort this dataset:
                try:
                    last_pix = get_attr(h5_fit, 'last_pixel')
                except KeyError:
                    last_pix = None

                # For now skip any fits that are missing 'last_pixel'
                if last_pix is None:
                    continue
                elif last_pix < self.h5_main.shape[0]:
                    partial_fits.append(h5_fit.parent)
                else:
                    completed_fits.append(h5_fit)
            else:
                if 'Guess' in h5_group.keys():
                    h5_guess = h5_group['Guess']

                    # sort this dataset:
                    try:
                        last_pix = get_attr(h5_guess, 'last_pixel')
                    except KeyError:
                        last_pix = None

                    # For now skip any fits that are missing 'last_pixel'
                    if last_pix is None:
                        continue
                    elif last_pix == self.h5_main.shape[0]:
                        if self._verbose:
                            print('{} was a completed Guess'.format(h5_guess.name))
                        completed_guess.append(h5_guess)
                    else:
                        if self._verbose:
                            print('{} did not not have completed Guesses'.format(h5_guess.name))
                else:
                    if self._verbose:
                        print('{} did not even have Guess. Categorizing as defective Group'.format(h5_group.name))

        return completed_guess, partial_fits, completed_fits
Ejemplo n.º 8
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)

    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)

    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2))

    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print('Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'),
                                    None, None,
                                    h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks, compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt