Esempio n. 1
0
def validate_aux_dset_pair(test_class, h5_group, h5_inds, h5_vals, dim_names, dim_units, inds_matrix,
                           vals_matrix=None, base_name=None, h5_main=None, is_spectral=True):
    if vals_matrix is None:
        vals_matrix = inds_matrix
    if base_name is None:
        if is_spectral:
            base_name = 'Spectroscopic'
        else:
            base_name = 'Position'
    else:
        test_class.assertIsInstance(base_name, (str, unicode))

    for h5_dset, exp_dtype, exp_name, ref_data in zip([h5_inds, h5_vals],
                                                      [INDICES_DTYPE, VALUES_DTYPE],
                                                      [base_name + '_Indices', base_name + '_Values'],
                                                      [inds_matrix, vals_matrix]):
        if isinstance(h5_main, h5py.Dataset):
            test_class.assertEqual(h5_main.file[h5_main.attrs[exp_name]], h5_dset)
        test_class.assertIsInstance(h5_dset, h5py.Dataset)
        test_class.assertEqual(h5_dset.parent, h5_group)
        test_class.assertEqual(h5_dset.name.split('/')[-1], exp_name)
        test_class.assertTrue(np.allclose(ref_data, h5_dset[()]))
        test_class.assertEqual(h5_dset.dtype, exp_dtype)
        test_class.assertTrue(np.all([_ in h5_dset.attrs.keys() for _ in ['labels', 'units']]))
        test_class.assertTrue(np.all([x == y for x, y in zip(dim_names, get_attr(h5_dset, 'labels'))]))
        test_class.assertTrue(np.all([x == y for x, y in zip(dim_units, get_attr(h5_dset, 'units'))]))
        # assert region references
        for dim_ind, curr_name in enumerate(dim_names):
            expected = np.squeeze(ref_data[:, dim_ind])
            if is_spectral:
                expected = np.squeeze(ref_data[dim_ind])
            test_class.assertTrue(np.allclose(expected,
                                              np.squeeze(h5_dset[h5_dset.attrs[curr_name]])))
Esempio n. 2
0
    def setUp(self):
        data_utils.make_beps_file()
        self.orig_labels_order = ['X', 'Y', 'Cycle', 'Bias']
        self.h5_file = h5py.File(data_utils.std_beps_path, mode='r')

        h5_grp = self.h5_file['/Raw_Measurement/']
        self.source_nd_s2f = h5_grp['n_dim_form'][()]
        self.source_nd_f2s = self.source_nd_s2f.transpose(1, 0, 3, 2)
        self.h5_source = USIDataset(h5_grp['source_main'])

        self.pos_dims=[]
        self.spec_dims=[]

        for dim_name, dim_units in zip(self.h5_source.pos_dim_labels,
                                       hdf_utils.get_attr(self.h5_source.h5_pos_inds, 'units')):
            self.pos_dims.append(
                Dimension(dim_name, dim_units, h5_grp[dim_name][()]))

        for dim_name, dim_units in zip(self.h5_source.spec_dim_labels,
                                       hdf_utils.get_attr(self.h5_source.h5_spec_inds, 'units')):
            self.spec_dims.append(
                Dimension(dim_name, dim_units, h5_grp[dim_name][()]))

        res_grp_0 = h5_grp['source_main-Fitter_000']
        self.results_0_nd_s2f = res_grp_0['n_dim_form'][()]
        self.results_0_nd_f2s = self.results_0_nd_s2f.transpose(1, 0, 3, 2)
        self.h5_compound = USIDataset(res_grp_0['results_main'])

        res_grp_1 = h5_grp['source_main-Fitter_001']
        self.results_1_nd_s2f = res_grp_1['n_dim_form'][()]
        self.results_1_nd_f2s = self.results_1_nd_s2f.transpose(1, 0, 3, 2)
        self.h5_complex = USIDataset(res_grp_1['results_main'])
Esempio n. 3
0
    def test_group_indexing_sequential(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            writer = HDFwriter(h5_f)
            micro_group_0 = VirtualGroup('Test_',
                                         attrs={
                                             'att_1': 'string_val',
                                             'att_2': 1.2345
                                         })
            [h5_group_0] = writer.write(micro_group_0)

            _ = writer.write(VirtualGroup('blah'))

            self.assertIsInstance(h5_group_0, h5py.Group)
            self.assertEqual(h5_group_0.name, '/Test_000')
            for key, expected_val in micro_group_0.attrs.items():
                self.assertTrue(
                    np.all(get_attr(h5_group_0, key) == expected_val))

            micro_group_1 = VirtualGroup('Test_',
                                         attrs={
                                             'att_3': [1, 2, 3, 4],
                                             'att_4':
                                             ['str_1', 'str_2', 'str_3']
                                         })
            [h5_group_1] = writer.write(micro_group_1)

            self.assertIsInstance(h5_group_1, h5py.Group)
            self.assertEqual(h5_group_1.name, '/Test_001')
            for key, expected_val in micro_group_1.attrs.items():
                self.assertTrue(
                    np.all(get_attr(h5_group_1, key) == expected_val))

        os.remove(file_path)
Esempio n. 4
0
    def test_group_indexing_simultaneous(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            micro_group_0 = VirtualGroup('Test_', attrs = {'att_1': 'string_val', 'att_2': 1.2345})
            micro_group_1 = VirtualGroup('Test_', attrs={'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']})
            root_group = VirtualGroup('', children=[VirtualGroup('blah'), micro_group_0,
                                                    VirtualGroup('meh'), micro_group_1])

            writer = HDFwriter(h5_f)
            h5_refs_list = writer.write(root_group)

            [h5_group_1] = get_h5_obj_refs(['Test_001'], h5_refs_list)
            [h5_group_0] = get_h5_obj_refs(['Test_000'], h5_refs_list)

            self.assertIsInstance(h5_group_0, h5py.Group)
            self.assertEqual(h5_group_0.name, '/Test_000')
            for key, expected_val in micro_group_0.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_0, key) == expected_val))

            self.assertIsInstance(h5_group_1, h5py.Group)
            self.assertEqual(h5_group_1.name, '/Test_001')
            for key, expected_val in micro_group_1.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_1, key) == expected_val))

        os.remove(file_path)
Esempio n. 5
0
    def test_write_dset_under_root(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            data = np.random.rand(5, 7)
            attrs = {'att_1': 'string_val',
                     'att_2': 1.2345,
                     'att_3': [1, 2, 3, 4],
                     'att_4': ['str_1', 'str_2', 'str_3'],
                     'labels': {'even_rows': (slice(0, None, 2), slice(None)),
                                'odd_rows': (slice(1, None, 2), slice(None))}
                     }
            micro_dset = VirtualDataset('test', data)
            micro_dset.attrs = attrs.copy()
            [h5_dset] = writer.write(micro_dset)
            self.assertIsInstance(h5_dset, h5py.Dataset)

            reg_ref = attrs.pop('labels')

            self.assertEqual(len(h5_dset.attrs), len(attrs) + 1 + len(reg_ref))

            for key, expected_val in attrs.items():
                self.assertTrue(np.all(get_attr(h5_dset, key) == expected_val))

            self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_dset, 'labels')]))

            expected_data = [data[:None:2], data[1:None:2]]
            written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Esempio n. 6
0
def reshape_from_lines_to_pixels(h5_main, pts_per_cycle, scan_step_x_m=None):
    """
    Breaks up the provided raw G-mode dataset into lines and pixels (from just lines)

    Parameters
    ----------
    h5_main : h5py.Dataset object
        Reference to the main dataset that contains the raw data that is only broken up by lines
    pts_per_cycle : unsigned int
        Number of points in a single pixel
    scan_step_x_m : float
        Step in meters for pixels

    Returns
    -------
    h5_resh : h5py.Dataset object
        Reference to the main dataset that contains the reshaped data
    """
    if not check_if_main(h5_main):
        raise TypeError('h5_main is not a Main dataset')
    h5_main = USIDataset(h5_main)
    if pts_per_cycle % 1 != 0 or pts_per_cycle < 1:
        raise TypeError('pts_per_cycle should be a positive integer')
    if scan_step_x_m is not None:
        if not isinstance(scan_step_x_m, Number):
            raise TypeError('scan_step_x_m should be a real number')
    else:
        scan_step_x_m = 1

    if h5_main.shape[1] % pts_per_cycle != 0:
        warn('Error in reshaping the provided dataset to pixels. Check points per pixel')
        raise ValueError

    num_cols = int(h5_main.shape[1] / pts_per_cycle)

    # TODO: DO NOT assume simple 1 spectral dimension!
    single_ao = np.squeeze(h5_main.h5_spec_vals[:, :pts_per_cycle])

    spec_dims = Dimension(get_attr(h5_main.h5_spec_vals, 'labels')[0],
                          get_attr(h5_main.h5_spec_vals, 'units')[0], single_ao)

    # TODO: DO NOT assume simple 1D in positions!
    pos_dims = [Dimension('X', 'm', np.linspace(0, scan_step_x_m, num_cols)),
                Dimension('Y', 'm', np.linspace(0, h5_main.h5_pos_vals[1, 0], h5_main.shape[0]))]

    h5_group = create_results_group(h5_main, 'Reshape')
    # TODO: Create empty datasets and then write for very large datasets
    h5_resh = write_main_dataset(h5_group, (num_cols * h5_main.shape[0], pts_per_cycle), 'Reshaped_Data',
                                 get_attr(h5_main, 'quantity')[0], get_attr(h5_main, 'units')[0], pos_dims, spec_dims,
                                 chunks=(10, pts_per_cycle), dtype=h5_main.dtype, compression=h5_main.compression)

    # TODO: DON'T write in one shot assuming small datasets fit in memory!
    print('Starting to reshape G-mode line data. Please be patient')
    h5_resh[()] = np.reshape(h5_main[()], (-1, pts_per_cycle))

    print('Finished reshaping G-mode line data to rows and columns')

    return USIDataset(h5_resh)
Esempio n. 7
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the clustering results
        """
        print('Writing clustering results to file.')
        num_clusters = self.__mean_resp.shape[0]

        h5_cluster_group = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(h5_cluster_group, self.parms_dict)
        h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0]

        h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels',
                                       'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1),
                                       h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                       aux_spec_prefix='Cluster_', dtype=np.uint32)

        if self.num_comps != self.h5_main.shape[1]:
            '''
            Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components
            Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data
            for now until a better method is figured out
            '''
            """
            if isinstance(self.data_slice[1], np.ndarray):
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()]

            else:
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]]

            ds_centroid_values.data[0, :] = centroid_vals_mat
            """
            if isinstance(self.data_slice[1], np.ndarray):
                vals_slice = self.data_slice[1].tolist()
            else:
                vals_slice = self.data_slice[1]
            vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze()
            new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals)
            h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True)

        else:
            h5_inds = self.h5_main.h5_spec_inds
            h5_vals = self.h5_main.h5_spec_vals

        # For now, link centroids with default spectroscopic indices and values.
        h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response',
                                          get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0],
                                          Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None,
                                          h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_',
                                          h5_spec_vals=h5_vals)

        return h5_cluster_group
Esempio n. 8
0
    def _is_legal(self, h5_main, variables=None):
        """
        Checks whether or not the provided object can be analyzed by this class.

        Parameters
        ----------
        h5_main : h5py.Dataset instance
            The dataset containing the SHO Fit (not necessarily the dataset directly resulting from SHO fit)
            over which the loop projection, guess, and fit will be performed.
        variables : list(string)
            The dimensions needed to be present in the attributes of h5_main to analyze the data with Model.

        Returns
        -------
        legal : Boolean
            Whether or not this dataset satisfies the necessary conditions for analysis

        """
        if variables is None:
            variables = ['DC_Offset']

        file_data_type = get_attr(h5_main.file, 'data_type')
        meas_grp_name = h5_main.name.split('/')
        h5_meas_grp = h5_main.file[meas_grp_name[1]]
        meas_data_type = get_attr(h5_meas_grp, 'data_type')

        if h5_main.dtype != sho32:
            warn('Provided dataset is not a SHO results dataset.')
            return False

        # This check is clunky but should account for case differences.  If Python2 support is dropped, simplify with
        # single check using casefold.
        if not (meas_data_type.lower != file_data_type.lower or meas_data_type.upper != file_data_type.upper):
            warn('Mismatch between file and Measurement group data types for the chosen dataset.')
            print('File data type is {}.  The data type for Measurement group {} is {}'.format(file_data_type,
                                                                                               h5_meas_grp.name,
                                                                                               meas_data_type))
            return False

        if file_data_type == 'BEPSData':
            if get_attr(h5_meas_grp, 'VS_mode') not in ['DC modulation mode', 'current mode']:
                warn('Provided dataset is not a DC modulation or current mode BEPS dataset')
                return False
            elif get_attr(h5_meas_grp, 'VS_cycle_fraction') != 'full':
                warn('Provided dataset does not have full cycles')
                return False

        elif file_data_type == 'cKPFMData':
            if get_attr(h5_meas_grp, 'VS_mode') != 'cKPFM':
                warn('Provided dataset has an unsupported VS_mode.')
                return False

        return super(BELoopFitter, self)._is_legal(h5_main, variables)
Esempio n. 9
0
 def test_dependent_dim(self):
     with h5py.File(data_utils.relaxation_path, mode='r') as h5_f:
         h5_inds = h5_f[
             '/Measurement_000/Channel_000/Spectroscopic_Indices']
         h5_vals = h5_f['/Measurement_000/Channel_000/Spectroscopic_Values']
         spec_dim_names = hdf_utils.get_attr(h5_inds, 'labels')
         ret_dict = hdf_utils.get_unit_values(h5_inds, h5_vals)
         for dim_ind, dim_name in enumerate(spec_dim_names):
             exp_val = hdf_utils.get_attr(h5_inds,
                                          'unit_vals_dim_' + str(dim_ind))
             act_val = ret_dict[dim_name]
             self.assertTrue(np.allclose(exp_val, act_val))
Esempio n. 10
0
    def test_write_dset_under_existing_group(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            h5_g = writer._create_group(h5_f, VirtualGroup('test_group'))

            self.assertIsInstance(h5_g, h5py.Group)

            data = np.random.rand(5, 7)
            attrs = {
                'att_1': 'string_val',
                'att_2': 1.2345,
                'att_3': [1, 2, 3, 4],
                'att_4': ['str_1', 'str_2', 'str_3'],
                'labels': {
                    'even_rows': (slice(0, None, 2), slice(None)),
                    'odd_rows': (slice(1, None, 2), slice(None))
                }
            }
            micro_dset = VirtualDataset('test', data, parent='/test_group')
            micro_dset.attrs = attrs.copy()
            [h5_dset] = writer.write(micro_dset)
            self.assertIsInstance(h5_dset, h5py.Dataset)

            self.assertEqual(h5_dset.parent, h5_g)

            reg_ref = attrs.pop('labels')

            self.assertEqual(len(h5_dset.attrs), len(attrs) + 1 + len(reg_ref))

            for key, expected_val in attrs.items():
                self.assertTrue(np.all(get_attr(h5_dset, key) == expected_val))

            self.assertTrue(
                np.all([
                    x in list(reg_ref.keys())
                    for x in get_attr(h5_dset, 'labels')
                ]))

            expected_data = [data[:None:2], data[1:None:2]]
            written_data = [
                h5_dset[h5_dset.attrs['even_rows']],
                h5_dset[h5_dset.attrs['odd_rows']]
            ]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Esempio n. 11
0
    def get_all_dimensions():
        pos_dims = []
        spec_dims = []
        with h5py.File(test_h5_file_path, mode='r') as h5_f:
            h5_raw_grp = h5_f['Raw_Measurement']
            usi_main = USIDataset(h5_raw_grp['source_main'])
            for dim_name, dim_units in zip(usi_main.pos_dim_labels,
                                           hdf_utils.get_attr(usi_main.h5_pos_inds, 'units')):
                pos_dims.append(Dimension(dim_name, dim_units, h5_raw_grp[dim_name][()]))

            for dim_name, dim_units in zip(usi_main.spec_dim_labels,
                                           hdf_utils.get_attr(
                                               usi_main.h5_spec_inds, 'units')):
                spec_dims.append(Dimension(dim_name, dim_units, h5_raw_grp[dim_name][()]))

        return pos_dims, spec_dims
Esempio n. 12
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the decomposition results
        """

        h5_decomp_group = create_results_group(self.h5_main, self.process_name)
        write_simple_attrs(h5_decomp_group, self.parms_dict)
        write_simple_attrs(h5_decomp_group, {'n_components': self.__components.shape[0],
                                             'n_samples': self.h5_main.shape[0], 'last_pixel': self.h5_main.shape[0]})

        decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0])

        # equivalent to V - compound / complex
        h5_components = write_main_dataset(h5_decomp_group, self.__components, 'Components',
                                           get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc,
                                           None,
                                           h5_spec_inds=self.h5_main.h5_spec_inds,
                                           h5_spec_vals=self.h5_main.h5_spec_vals)

        # equivalent of U - real
        h5_projections = write_main_dataset(h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance',
                                            'a.u.', None, decomp_desc, dtype=np.float32,
                                            h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals)

        # return the h5 group object
        self.h5_results_grp = h5_decomp_group
        return self.h5_results_grp
Esempio n. 13
0
    def _calc_raw(self):
        """

        Returns
        -------

        """
        mem_per_pix = self.n_sho_bins * self.h5_sho_fit.dtype.itemsize + self.n_spec_bins * self.h5_raw.dtype.itemsize
        free_mem = self.max_ram
        batch_size = int(free_mem / mem_per_pix)
        batches = gen_batches(self.n_pixels, batch_size)

        w_vec = self.h5_spec_vals[get_attr(self.h5_spec_vals, 'Frequency')].squeeze()
        w_vec = w_vec[:self.n_bins]

        for pix_batch in batches:
            sho_chunk = self.h5_sho_fit[pix_batch, :].flatten()

            raw_data = np.zeros([sho_chunk.shape[0], self.n_bins], dtype=np.complex64)
            for iparm, sho_parms in enumerate(sho_chunk):
                raw_data[iparm, :] = SHOfunc(sho_parms, w_vec)

            self.h5_raw[pix_batch, :] = raw_data.reshape([-1, self.n_spec_bins])

            self.h5_file.flush()

        return
Esempio n. 14
0
    def test_write_reg_ref_slice_dim_larger_than_data(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            writer = HDFwriter(h5_f)
            data = np.random.rand(5, 7)
            h5_dset = writer._create_simple_dset(h5_f, VirtualDataset('test', data))
            self.assertIsInstance(h5_dset, h5py.Dataset)

            attrs = {'labels': {'even_rows': (slice(0, 15, 2), slice(None)),
                                'odd_rows': (slice(1, 15, 2), slice(None))}}

            writer._write_dset_attributes(h5_dset, attrs.copy())
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(np.all([x in list(attrs['labels'].keys()) for x in get_attr(h5_dset, 'labels')]))

            expected_data = [data[:None:2], data[1:None:2]]
            written_data = [h5_dset[h5_dset.attrs['even_rows']], h5_dset[h5_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Esempio n. 15
0
    def test_generate_and_write_reg_ref_legal(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            data = np.random.rand(2, 7)
            h5_dset = writer._create_simple_dset(h5_f, VirtualDataset('test', data))
            self.assertIsInstance(h5_dset, h5py.Dataset)

            attrs = {'labels': ['row_1', 'row_2']}
            if sys.version_info.major == 3:
                with self.assertWarns(UserWarning):
                    writer._write_dset_attributes(h5_dset, attrs.copy())
            else:
                writer._write_dset_attributes(h5_dset, attrs.copy())
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(np.all([x in list(attrs['labels']) for x in get_attr(h5_dset, 'labels')]))

            expected_data = [data[0], data[1]]
            written_data = [h5_dset[h5_dset.attrs['row_1']], h5_dset[h5_dset.attrs['row_2']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(np.squeeze(exp), np.squeeze(act)))

        os.remove(file_path)
Esempio n. 16
0
    def test_string_representation(self):
        usi_dset = self.h5_source
        h5_main = self.h5_file[usi_dset.name]
        actual = usi_dset.__repr__()
        actual = [line.strip() for line in actual.split("\n")]
        actual = [actual[line_ind] for line_ind in [0, 2, 4, 7, 8, 10, 11]]

        expected = list()
        expected.append(h5_main.__repr__())
        expected.append(h5_main.name)
        expected.append(hdf_utils.get_attr(h5_main, "quantity") + " (" + hdf_utils.get_attr(h5_main, "units") + ")")
        for h5_inds in [usi_dset.h5_pos_inds, usi_dset.h5_spec_inds]:
            for dim_name, dim_size in zip(hdf_utils.get_attr(h5_inds, "labels"),
                                          hdf_utils.get_dimensionality(h5_inds)):
                expected.append(dim_name + ' - size: ' + str(dim_size))
        self.assertTrue(np.all([x == y for x, y in zip(actual, expected)]))
Esempio n. 17
0
    def _check_for_old_guess(self):
        """
        Returns a list of datasets where the same parameters have already been used to compute Guesses for this dataset

        Returns
        -------
        list
            List of datasets with results from do_guess on this dataset
        """
        groups = check_for_old(self.h5_main, self._fitter_name, new_parms=self._parms_dict, target_dset='Guess',
                               verbose=self._verbose)
        datasets = [grp['Guess'] for grp in groups]

        # Now sort these datasets into partial and complete:
        completed_dsets = []
        partial_dsets = []

        for dset in datasets:
            try:
                last_pix = get_attr(dset, 'last_pixel')
            except KeyError:
                last_pix = None
                
            # Skip datasets without last_pixel attribute
            if last_pix is None:
                continue
            elif last_pix < self.h5_main.shape[0]:
                partial_dsets.append(dset)
            else:
                completed_dsets.append(dset)

        return partial_dsets, completed_dsets
Esempio n. 18
0
    def test_write_reg_ref_main_one_dim(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        data = np.random.rand(7)
        with h5py.File(file_path, mode='w') as h5_f:
            h5_dset = h5_f.create_dataset('Main', data=data)
            reg_refs = {
                'even_rows': (slice(0, None, 2)),
                'odd_rows': (slice(1, None, 2))
            }
            reg_ref.write_region_references(h5_dset,
                                            reg_refs,
                                            add_labels_attr=True)
            self.assertEqual(len(h5_dset.attrs), 1 + len(reg_refs))
            actual = get_attr(h5_dset, 'labels')
            self.assertTrue(
                np.all([
                    x == y for x, y in zip(actual, ['even_rows', 'odd_rows'])
                ]))

            expected_data = [data[0:None:2], data[1:None:2]]
            written_data = [
                h5_dset[h5_dset.attrs['even_rows']],
                h5_dset[h5_dset.attrs['odd_rows']]
            ]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Esempio n. 19
0
    def _calc_raw(self):
        """

        Returns
        -------

        """
        mem_per_pix = self.n_sho_bins * self.h5_sho_fit.dtype.itemsize + self.n_spec_bins * self.h5_raw.dtype.itemsize
        free_mem = self.max_ram
        batch_size = int(free_mem / mem_per_pix)
        batches = gen_batches(self.n_pixels, batch_size)

        w_vec = self.h5_spec_vals[get_attr(self.h5_spec_vals,
                                           'Frequency')].squeeze()
        w_vec = w_vec[:self.n_bins]

        for pix_batch in batches:
            sho_chunk = self.h5_sho_fit[pix_batch, :].flatten()

            raw_data = np.zeros([sho_chunk.shape[0], self.n_bins],
                                dtype=np.complex64)
            for iparm, sho_parms in enumerate(sho_chunk):
                raw_data[iparm, :] = SHOfunc(sho_parms, w_vec)

            self.h5_raw[pix_batch, :] = raw_data.reshape(
                [-1, self.n_spec_bins])

            self.h5_file.flush()

        return
Esempio n. 20
0
    def _set_guess(self, h5_guess):
        """
        Setup to run the fit on an existing guess dataset.  Sets the attributes
        normally defined during do_guess.

        Parameters
        ----------
        h5_guess : h5py.Dataset
            Dataset object containing the guesses

        """
        '''
        Get the Spectroscopic and Position datasets from `self.h5_main`
        '''
        self._sho_spec_inds = self.h5_main.h5_spec_inds
        self._sho_spec_vals = self.h5_main.h5_spec_vals
        self._sho_pos_inds = self.h5_main.h5_pos_inds

        '''
        Find the Spectroscopic index for the DC_Offset
        '''
        fit_ind = np.argwhere(get_attr(self._sho_spec_vals, 'labels') == self._fit_dim_name).squeeze()
        self._fit_spec_index = fit_ind
        self._fit_offset_index = 1 + fit_ind

        '''
        Get the group and projection datasets
        '''
        self._h5_group = h5_guess.parent
        self.h5_projected_loops = self._h5_group['Projected_Loops']
        self.h5_loop_metrics = self._h5_group['Loop_Metrics']
        self._met_spec_inds = self._h5_group['Loop_Metrics_Indices']

        self.h5_guess = h5_guess
Esempio n. 21
0
    def _check_for_old_guess(self):
        """
        Returns a list of datasets where the same parameters have already been used to compute Guesses for this dataset

        Returns
        -------
        list
            List of datasets with results from do_guess on this dataset
        """
        groups = check_for_old(self.h5_main,
                               self._fitter_name,
                               new_parms=self._parms_dict,
                               target_dset='Guess',
                               verbose=self._verbose)
        datasets = [grp['Guess'] for grp in groups]

        # Now sort these datasets into partial and complete:
        completed_dsets = []
        partial_dsets = []

        for dset in datasets:
            try:
                last_pix = get_attr(dset, 'last_pixel')
            except KeyError:
                last_pix = None

            # Skip datasets without last_pixel attribute
            if last_pix is None:
                continue
            elif last_pix < self.h5_main.shape[0]:
                partial_dsets.append(dset)
            else:
                completed_dsets.append(dset)

        return partial_dsets, completed_dsets
Esempio n. 22
0
    def test_string_representation(self):
        with h5py.File(test_h5_file_path, mode='r') as h5_f:
            h5_main = h5_f['/Raw_Measurement/source_main']
            usi_dset = USIDataset(h5_main)
            actual = usi_dset.__repr__()
            actual = [line.strip() for line in actual.split("\n")]
            actual = [actual[line_ind] for line_ind in [0, 2, 4, 7, 8, 10, 11]]

            expected = list()
            expected.append(h5_main.__repr__())
            expected.append(h5_main.name)
            expected.append(hdf_utils.get_attr(h5_main, "quantity") + " (" + hdf_utils.get_attr(h5_main, "units") + ")")
            for h5_inds in [usi_dset.h5_pos_inds, usi_dset.h5_spec_inds]:
                for dim_name, dim_size in zip(hdf_utils.get_attr(h5_inds, "labels"),
                                              hdf_utils.get_dimensionality(h5_inds)):
                    expected.append(dim_name + ' - size: ' + str(dim_size))
            self.assertTrue(np.all([x == y for x, y in zip(actual, expected)]))
Esempio n. 23
0
    def test_simple_region_ref_copy(self):
        # based on test_hdf_writer.test_write_legal_reg_ref_multi_dim_data()
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        with h5py.File(file_path, mode='w') as h5_f:
            data = np.random.rand(5, 7)
            h5_orig_dset = h5_f.create_dataset('test', data=data)
            self.assertIsInstance(h5_orig_dset, h5py.Dataset)

            attrs = {
                'labels': {
                    'even_rows': (slice(0, None, 2), slice(None)),
                    'odd_rows': (slice(1, None, 2), slice(None))
                }
            }

            data_utils.write_main_reg_refs(h5_orig_dset, attrs['labels'])
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_orig_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(
                np.all([
                    x in list(attrs['labels'].keys())
                    for x in get_attr(h5_orig_dset, 'labels')
                ]))

            expected_data = [data[:None:2], data[1:None:2]]
            written_data = [
                h5_orig_dset[h5_orig_dset.attrs['even_rows']],
                h5_orig_dset[h5_orig_dset.attrs['odd_rows']]
            ]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

            # Now write a new dataset without the region reference:
            h5_new_dset = h5_f.create_dataset('other', data=data)
            self.assertIsInstance(h5_orig_dset, h5py.Dataset)
            h5_f.flush()

            for key in attrs['labels'].keys():
                reg_ref.simple_region_ref_copy(h5_orig_dset, h5_new_dset, key)

            # now check to make sure that this dataset also has the same region references:
            written_data = [
                h5_new_dset[h5_new_dset.attrs['even_rows']],
                h5_new_dset[h5_new_dset.attrs['odd_rows']]
            ]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Esempio n. 24
0
    def __init__(self, h5_main, **kwargs):
        super(BELoopProjector, self).__init__(h5_main, **kwargs)

        if 'DC_Offset' in self.h5_main.spec_dim_labels:
            self._fit_dim_name = 'DC_Offset'
        elif 'write_bias' in self.h5_main.spec_dim_labels:
            self._fit_dim_name = 'write_bias'
        else:
            raise ValueError('Neither "DC_Offset", nor "write_bias" were '
                             'spectroscopic dimension in the provided dataset '
                             'which has dimensions: {}'
                             '.'.format(self.h5_main.spec_dim_labels))

        if 'FORC' in self.h5_main.spec_dim_labels:
            self._forc_dim_name = 'FORC'
        else:
            self._forc_dim_name = 'FORC_Cycle'

        # TODO: Need to catch KeyError s that would be thrown when attempting to access attributes
        file_data_type = get_attr(h5_main.file, 'data_type')
        meas_grp_name = h5_main.name.split('/')
        h5_meas_grp = h5_main.file[meas_grp_name[1]]
        meas_data_type = get_attr(h5_meas_grp, 'data_type')

        if h5_main.dtype != sho32:
            raise TypeError('Provided dataset is not a SHO results dataset.')

        # This check is clunky but should account for case differences.
        # If Python2 support is dropped, simplify with# single check using case
        if not (meas_data_type.lower != file_data_type.lower
                or meas_data_type.upper != file_data_type.upper):
            message = 'Mismatch between file and Measurement group data types for the chosen dataset.\n'
            message += 'File data type is {}.  The data type for Measurement group {} is {}'.format(
                file_data_type, h5_meas_grp.name, meas_data_type)
            raise ValueError(message)

        if file_data_type == 'BEPSData':
            if get_attr(h5_meas_grp, 'VS_mode') not in [
                    'DC modulation mode', 'current mode'
            ]:
                raise ValueError(
                    'Provided dataset has a mode: "' +
                    get_attr(h5_meas_grp, 'VS_mode') + '" is not a '
                    '"DC modulation" or "current mode" BEPS dataset')
            elif get_attr(h5_meas_grp, 'VS_cycle_fraction') != 'full':
                raise ValueError('Provided dataset does not have full cycles')

        elif file_data_type == 'cKPFMData':
            if get_attr(h5_meas_grp, 'VS_mode') != 'cKPFM':
                raise ValueError(
                    'Provided dataset has an unsupported VS_mode: "' +
                    get_attr(h5_meas_grp, 'VS_mode') + '"')

        # #####################################################################

        self.process_name = "Loop_Projection"
        self.parms_dict = {'projection_method': 'pycroscopy BE loop model'}
Esempio n. 25
0
 def test_get_indices_for_region_ref_corners(self):
     with h5py.File(data_utils.std_beps_path, mode='r') as h5_f:
         h5_main = h5_f['/Raw_Measurement/source_main']
         ref_in = get_attr(h5_main, 'even_rows')
         ret_val = reg_ref.get_indices_for_region_ref(
             h5_main, ref_in, 'corners')
         expected_pos = np.repeat(np.arange(h5_main.shape[0])[::2], 2)
         expected_spec = np.tile(np.array([0, h5_main.shape[1] - 1]),
                                 expected_pos.size // 2)
         expected_corners = np.vstack((expected_pos, expected_spec)).T
         self.assertTrue(np.allclose(ret_val, expected_corners))
Esempio n. 26
0
 def test_get_indices_for_region_ref_slices(self):
     with h5py.File(data_utils.std_beps_path, mode='r') as h5_f:
         h5_main = h5_f['/Raw_Measurement/source_main']
         ref_in = get_attr(h5_main, 'even_rows')
         ret_val = reg_ref.get_indices_for_region_ref(
             h5_main, ref_in, 'slices')
         spec_slice = slice(0, h5_main.shape[1] - 1, None)
         expected_slices = np.array(
             [[slice(x, x, None), spec_slice]
              for x in np.arange(h5_main.shape[0])[::2]])
         self.assertTrue(np.all(ret_val == expected_slices))
Esempio n. 27
0
    def test_group_indexing_simultaneous(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            micro_group_0 = VirtualGroup('Test_',
                                         attrs={
                                             'att_1': 'string_val',
                                             'att_2': 1.2345
                                         })
            micro_group_1 = VirtualGroup('Test_',
                                         attrs={
                                             'att_3': [1, 2, 3, 4],
                                             'att_4':
                                             ['str_1', 'str_2', 'str_3']
                                         })
            root_group = VirtualGroup('',
                                      children=[
                                          VirtualGroup('blah'), micro_group_0,
                                          VirtualGroup('meh'), micro_group_1
                                      ])

            writer = HDFwriter(h5_f)
            h5_refs_list = writer.write(root_group)

            [h5_group_1] = get_h5_obj_refs(['Test_001'], h5_refs_list)
            [h5_group_0] = get_h5_obj_refs(['Test_000'], h5_refs_list)

            self.assertIsInstance(h5_group_0, h5py.Group)
            self.assertEqual(h5_group_0.name, '/Test_000')
            for key, expected_val in micro_group_0.attrs.items():
                self.assertTrue(
                    np.all(get_attr(h5_group_0, key) == expected_val))

            self.assertIsInstance(h5_group_1, h5py.Group)
            self.assertEqual(h5_group_1.name, '/Test_001')
            for key, expected_val in micro_group_1.attrs.items():
                self.assertTrue(
                    np.all(get_attr(h5_group_1, key) == expected_val))

        os.remove(file_path)
Esempio n. 28
0
    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name,
                                            h5_parent_group=self._h5_target_group)
        self.h5_results_grp = h5_svd_group
        self._write_source_dset_provenance()
        

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))

        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name,
                                                           data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
Esempio n. 29
0
 def test_sparse_samp_no_attr(self):
     # What should the user expect this function to do? throw an error.
     # Without the attribute, this function will have no idea that it is looking at a sparse sampling case
     # it will return the first and second columns of vals blindly
     with h5py.File(data_utils.sparse_sampling_path, mode='r') as h5_f:
         h5_inds = h5_f['/Measurement_000/Channel_000/Position_Indices']
         h5_vals = h5_f['/Measurement_000/Channel_000/Position_Values']
         dim_names = hdf_utils.get_attr(h5_inds, 'labels')
         ret_dict = hdf_utils.get_unit_values(h5_inds, h5_vals)
         for dim_ind, dim_name in enumerate(dim_names):
             exp_val = h5_vals[:, dim_ind]
             act_val = ret_dict[dim_name]
             self.assertTrue(np.allclose(exp_val, act_val))
Esempio n. 30
0
 def test_legal_03(self):
     attrs = {
         'att_1': 'string_val',
         'att_2': 1.2345,
         'att_3': [1, 2, 3, 4],
         'att_4': ['str_1', 'str_2', 'str_3']
     }
     with h5py.File(data_utils.std_beps_path, mode='r') as h5_f:
         h5_group = h5_f['/Raw_Measurement/source_main-Fitter_000']
         for key, expected_value in attrs.items():
             self.assertTrue(
                 np.all(
                     hdf_utils.get_attr(h5_group, key) == expected_value))
Esempio n. 31
0
    def test_np_array(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        with h5py.File(file_path, mode='w') as h5_f:

            attrs = {'att_1': np.random.rand(4)}

            hdf_utils.write_simple_attrs(h5_f, attrs)

            for key, expected_val in attrs.items():
                self.assertTrue(
                    np.all(hdf_utils.get_attr(h5_f, key) == expected_val))

        os.remove(file_path)
Esempio n. 32
0
    def _write_results_chunk(self):
        """
        Writes the provided SVD results to file

        Parameters
        ----------
        """
        comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s))

        h5_svd_group = create_results_group(self.h5_main, self.process_name)
        self.h5_results_grp = h5_svd_group

        write_simple_attrs(h5_svd_group, self.parms_dict)
        write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'})

        h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim,
                                  h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                  dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize))
        # print(get_attr(self.h5_main, 'quantity')[0])
        h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0],
                                  'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds,
                                  h5_spec_vals=self.h5_main.h5_spec_vals,
                                  chunks=calc_chunks(self.__v.shape, self.h5_main.dtype.itemsize))

        # No point making this 1D dataset a main dataset
        h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s))

        '''
        Check h5_main for plot group references.
        Copy them into V if they exist
        '''
        for key in self.h5_main.attrs.keys():
            if '_Plot_Group' not in key:
                continue

            ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners')
            ref_inds = ref_inds.reshape([-1, 2, 2])
            ref_inds[:, 1, 0] = h5_v.shape[0] - 1

            svd_ref = create_region_reference(h5_v, ref_inds)

            h5_v.attrs[key] = svd_ref

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_svd_group.create_dataset(self._status_dset_name,
                                                           data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
Esempio n. 33
0
    def _create_guess_datasets(self):
        """
        Creates the h5 group, guess dataset, corresponding spectroscopic datasets and also
        links the guess dataset to the spectroscopic datasets.
        """
        self.h5_results_grp = create_results_group(
            self.h5_main,
            self.process_name,
            h5_parent_group=self._h5_target_group)
        write_simple_attrs(self.h5_results_grp, self.parms_dict)

        # If writing to a new HDF5 file:
        # Add back the data_type attribute - still being used in the visualizer
        if self.h5_results_grp.file != self.h5_main.file:
            write_simple_attrs(
                self.h5_results_grp.file,
                {'data_type': get_attr(self.h5_main.file, 'data_type')})

        ret_vals = write_reduced_anc_dsets(self.h5_results_grp,
                                           self.h5_main.h5_spec_inds,
                                           self.h5_main.h5_spec_vals,
                                           self._fit_dim_name,
                                           verbose=self.verbose)

        h5_sho_inds, h5_sho_vals = ret_vals

        self._h5_guess = write_main_dataset(
            self.h5_results_grp, (self.h5_main.shape[0], self.num_udvs_steps),
            'Guess',
            'SHO',
            'compound',
            None,
            None,
            h5_pos_inds=self.h5_main.h5_pos_inds,
            h5_pos_vals=self.h5_main.h5_pos_vals,
            h5_spec_inds=h5_sho_inds,
            h5_spec_vals=h5_sho_vals,
            chunks=(1, self.num_udvs_steps),
            dtype=sho32,
            main_dset_attrs=self.parms_dict,
            verbose=self.verbose)

        # Does not make sense to propagate region refs - nobody uses them
        # copy_region_refs(self.h5_main, self._h5_guess)

        self._h5_guess.file.flush()

        if self.verbose and self.mpi_rank == 0:
            print('Finished creating Guess dataset')
Esempio n. 34
0
    def test_group_indexing_sequential(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            writer = HDFwriter(h5_f)
            micro_group_0 = VirtualGroup('Test_', attrs={'att_1': 'string_val', 'att_2': 1.2345})
            [h5_group_0] = writer.write(micro_group_0)

            _ = writer.write(VirtualGroup('blah'))

            self.assertIsInstance(h5_group_0, h5py.Group)
            self.assertEqual(h5_group_0.name, '/Test_000')
            for key, expected_val in micro_group_0.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_0, key) == expected_val))

            micro_group_1 = VirtualGroup('Test_', attrs={'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']})
            [h5_group_1] = writer.write(micro_group_1)

            self.assertIsInstance(h5_group_1, h5py.Group)
            self.assertEqual(h5_group_1.name, '/Test_001')
            for key, expected_val in micro_group_1.attrs.items():
                self.assertTrue(np.all(get_attr(h5_group_1, key) == expected_val))

        os.remove(file_path)
Esempio n. 35
0
    def test_write_single_group(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            attrs = {'att_1': 'string_val',
                     'att_2': 1.2345,
                     'att_3': [1, 2, 3, 4],
                     'att_4': ['str_1', 'str_2', 'str_3']}

            micro_group = VirtualGroup('Test_')
            micro_group.attrs = attrs
            writer = HDFwriter(h5_f)
            [h5_group] = writer.write(micro_group)

            for key, expected_val in attrs.items():
                self.assertTrue(np.all(get_attr(h5_group, key) == expected_val))

        os.remove(file_path)
Esempio n. 36
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the decomposition results
        """

        h5_decomp_group = create_results_group(self.h5_main, self.process_name,
                                               h5_parent_group=self._h5_target_group)
        self._write_source_dset_provenance()
        write_simple_attrs(h5_decomp_group, self.parms_dict)
        write_simple_attrs(h5_decomp_group, {'n_components': self.__components.shape[0],
                                             'n_samples': self.h5_main.shape[0]})

        decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0])

        # equivalent to V - compound / complex
        h5_components = write_main_dataset(h5_decomp_group, self.__components, 'Components',
                                           get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc,
                                           None,
                                           h5_spec_inds=self.h5_main.h5_spec_inds,
                                           h5_spec_vals=self.h5_main.h5_spec_vals)

        # equivalent of U - real
        h5_projections = write_main_dataset(h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance',
                                            'a.u.', None, decomp_desc, dtype=np.float32,
                                            h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals)

        # return the h5 group object
        self.h5_results_grp = h5_decomp_group

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_decomp_group.create_dataset(self._status_dset_name,
                                                              data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_decomp_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return self.h5_results_grp
Esempio n. 37
0
    def test_write_legal_reg_ref_multi_dim_data_2nd_dim(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            data = np.random.rand(5, 3)
            h5_dset = writer._create_simple_dset(h5_f,
                                                 VirtualDataset('test', data))
            self.assertIsInstance(h5_dset, h5py.Dataset)

            attrs = {
                'labels': {
                    'even_rows': (slice(None), slice(0, None, 2)),
                    'odd_rows': (slice(None), slice(1, None, 2))
                }
            }

            writer._write_dset_attributes(h5_dset, attrs.copy())
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(
                np.all([
                    x in list(attrs['labels'].keys())
                    for x in get_attr(h5_dset, 'labels')
                ]))

            expected_data = [data[:, 0:None:2], data[:, 1:None:2]]
            written_data = [
                h5_dset[h5_dset.attrs['even_rows']],
                h5_dset[h5_dset.attrs['odd_rows']]
            ]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

        os.remove(file_path)
Esempio n. 38
0
    def test_write_single_group(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:
            attrs = {
                'att_1': 'string_val',
                'att_2': 1.2345,
                'att_3': [1, 2, 3, 4],
                'att_4': ['str_1', 'str_2', 'str_3']
            }

            micro_group = VirtualGroup('Test_')
            micro_group.attrs = attrs
            writer = HDFwriter(h5_f)
            [h5_group] = writer.write(micro_group)

            for key, expected_val in attrs.items():
                self.assertTrue(
                    np.all(get_attr(h5_group, key) == expected_val))

        os.remove(file_path)
Esempio n. 39
0
    def test_to_grp(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        with h5py.File(file_path, mode='w') as h5_f:

            h5_group = h5_f.create_group('Blah')

            attrs = {
                'att_1': 'string_val',
                'att_2': 1.234,
                'att_3': [1, 2, 3.14, 4],
                'att_4': ['s', 'tr', 'str_3']
            }

            hdf_utils.write_simple_attrs(h5_group, attrs)

            for key, expected_val in attrs.items():
                self.assertTrue(
                    np.all(hdf_utils.get_attr(h5_group, key) == expected_val))

        os.remove(file_path)
Esempio n. 40
0
    def test_generate_and_write_reg_ref_legal(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            writer = HDFwriter(h5_f)
            data = np.random.rand(2, 7)
            h5_dset = writer._create_simple_dset(h5_f,
                                                 VirtualDataset('test', data))
            self.assertIsInstance(h5_dset, h5py.Dataset)

            attrs = {'labels': ['row_1', 'row_2']}
            if sys.version_info.major == 3:
                with self.assertWarns(UserWarning):
                    writer._write_dset_attributes(h5_dset, attrs.copy())
            else:
                writer._write_dset_attributes(h5_dset, attrs.copy())
            h5_f.flush()

            # two atts point to region references. one for labels
            self.assertEqual(len(h5_dset.attrs), 1 + len(attrs['labels']))

            # check if the labels attribute was written:

            self.assertTrue(
                np.all([
                    x in list(attrs['labels'])
                    for x in get_attr(h5_dset, 'labels')
                ]))

            expected_data = [data[0], data[1]]
            written_data = [
                h5_dset[h5_dset.attrs['row_1']],
                h5_dset[h5_dset.attrs['row_2']]
            ]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(np.squeeze(exp), np.squeeze(act)))

        os.remove(file_path)
Esempio n. 41
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the decomposition results
        """

        h5_decomp_group = create_results_group(self.h5_main, self.process_name)
        write_simple_attrs(h5_decomp_group, self.parms_dict)
        write_simple_attrs(h5_decomp_group, {'n_components': self.__components.shape[0],
                                             'n_samples': self.h5_main.shape[0]})

        decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0])

        # equivalent to V - compound / complex
        h5_components = write_main_dataset(h5_decomp_group, self.__components, 'Components',
                                           get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc,
                                           None,
                                           h5_spec_inds=self.h5_main.h5_spec_inds,
                                           h5_spec_vals=self.h5_main.h5_spec_vals)

        # equivalent of U - real
        h5_projections = write_main_dataset(h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance',
                                            'a.u.', None, decomp_desc, dtype=np.float32,
                                            h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals)

        # return the h5 group object
        self.h5_results_grp = h5_decomp_group

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_decomp_group.create_dataset(self._status_dset_name,
                                                              data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_decomp_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return self.h5_results_grp
Esempio n. 42
0
    def basic_file_validation(self, h5_f):
        self.assertEqual('ImageTranslator',
                         hdf_utils.get_attr(h5_f, 'translator'))

        # First level should have absolutely nothing besides one group
        self.assertEqual(len(h5_f.items()), 1)
        self.assertTrue('Measurement_000' in h5_f.keys())
        h5_meas_grp = h5_f['Measurement_000']
        self.assertIsInstance(h5_meas_grp, h5py.Group)

        # Again, this group should only have one group - Channel_000
        self.assertEqual(len(h5_meas_grp.items()), 1)
        self.assertTrue('Channel_000' in h5_meas_grp.keys())
        h5_chan_grp = h5_meas_grp['Channel_000']
        self.assertIsInstance(h5_chan_grp, h5py.Group)

        # This channel group is not expected to have any (custom) attributes but it will contain the main dataset
        self.assertEqual(len(h5_chan_grp.items()), 5)
        for dset_name in [
                'Raw_Data', 'Position_Indices', 'Position_Values',
                'Spectroscopic_Indices', 'Spectroscopic_Values'
        ]:
            self.assertTrue(dset_name in h5_chan_grp.keys())
            h5_dset = h5_chan_grp[dset_name]
            self.assertIsInstance(h5_dset, h5py.Dataset)

        usid_main = USIDataset(h5_chan_grp['Raw_Data'])

        self.assertIsInstance(usid_main, USIDataset)
        self.assertEqual(usid_main.name.split('/')[-1], 'Raw_Data')
        self.assertEqual(usid_main.parent, h5_chan_grp)

        validate_aux_dset_pair(self,
                               h5_chan_grp,
                               usid_main.h5_spec_inds,
                               usid_main.h5_spec_vals, ['arb'], ['a.u.'],
                               np.atleast_2d([0]),
                               h5_main=usid_main,
                               is_spectral=True)
Esempio n. 43
0
    def test_to_dset(self):
        file_path = 'test.h5'
        data_utils.delete_existing_file(file_path)
        with h5py.File(file_path, mode='w') as h5_f:

            h5_dset = h5_f.create_dataset('Test', data=np.arange(3))

            attrs = {
                'att_1': 'string_val',
                'att_2': 1.2345,
                'att_3': [1, 2, 3, 4],
                'att_4': ['str_1', 'str_2', 'str_3']
            }

            hdf_utils.write_simple_attrs(h5_dset, attrs)

            self.assertEqual(len(h5_dset.attrs), len(attrs))

            for key, expected_val in attrs.items():
                self.assertTrue(
                    np.all(hdf_utils.get_attr(h5_dset, key) == expected_val))

        os.remove(file_path)
Esempio n. 44
0
    def translate(self, h5_path, force_patch=False, **kwargs):
        """
        Add the needed references and attributes to the h5 file that are not created by the
        LabView data aquisition program.

        Parameters
        ----------
        h5_path : str
            path to the h5 file
        force_patch : bool, optional
            Should the check to see if the file has already been patched be ignored.
            Default False.

        Returns
        -------
        h5_file : h5py.File
            patched hdf5 file

        """
        # Open the file and check if a patch is needed
        h5_file = h5py.File(os.path.abspath(h5_path), 'r+')
        if h5_file.attrs.get('translator') is not None and not force_patch:
            print('File is already Pycroscopy ready.')
            return h5_file

        '''
        Get the list of all Raw_Data Datasets
        Loop over the list and update the needed attributes
        '''
        raw_list = find_dataset(h5_file, 'Raw_Data')
        for h5_raw in raw_list:
            if 'quantity' not in h5_raw.attrs:
                h5_raw.attrs['quantity'] = 'quantity'
            if 'units' not in h5_raw.attrs:
                h5_raw.attrs['units'] = 'a.u.'

            # Grab the channel and measurement group of the data to check some needed attributes
            h5_chan = h5_raw.parent
            try:
                c_type = get_attr(h5_chan, 'channel_type')

            except KeyError:
                warn_str = "'channel_type' was not found as an attribute of {}.\n".format(h5_chan.name)
                warn_str += "If this is BEPS or BELine data from the LabView aquisition software, " + \
                            "please run the following piece of code.  Afterwards, run this function again.\n" + \
                            "CODE: " \
                            "hdf.file['{}'].attrs['channel_type'] = 'BE'".format(h5_chan.name)
                warn(warn_str)
                return h5_file

            except:
                raise

            if c_type != 'BE':
                continue

            h5_meas = h5_chan.parent
            h5_meas.attrs['num_UDVS_steps'] = h5_meas.attrs['num_steps']

            # Get the object handles for the Indices and Values datasets
            h5_pos_inds = h5_chan['Position_Indices']
            h5_pos_vals = h5_chan['Position_Values']
            h5_spec_inds = h5_chan['Spectroscopic_Indices']
            h5_spec_vals = h5_chan['Spectroscopic_Values']

            # Make sure we have correct spectroscopic indices for the given values
            ds_spec_inds = create_spec_inds_from_vals(h5_spec_vals[()])
            if not np.allclose(ds_spec_inds, h5_spec_inds[()]):
                h5_spec_inds[:, :] = ds_spec_inds[:, :]
                h5_file.flush()

            # Get the labels and units for the Spectroscopic datasets
            h5_spec_labels = h5_spec_inds.attrs['labels']
            inds_and_vals = [h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals]
            for dset in inds_and_vals:
                spec_labels = dset.attrs['labels']
                try:
                    spec_units = dset.attrs['units']

                    if len(spec_units) != len(spec_labels):
                        raise KeyError

                except KeyError:
                    dset['units'] = ['' for _ in spec_labels]
                except:
                    raise

            for ilabel, label in enumerate(h5_spec_labels):
                label_slice = (slice(ilabel, ilabel + 1), slice(None))
                if label == '':
                    label = 'Step'
                h5_spec_inds.attrs[label] = h5_spec_inds.regionref[label_slice]
                h5_spec_vals.attrs[label] = h5_spec_vals.regionref[label_slice]

            # Link the references to the Indices and Values datasets to the Raw_Data
            link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals)

            # Also link the Bin_Frequencies and Bin_Wfm_Type datasets
            h5_freqs = h5_chan['Bin_Frequencies']
            aux_dset_names = ['Bin_Frequencies']
            aux_dset_refs = [h5_freqs.ref]
            check_and_link_ancillary(h5_raw, aux_dset_names, anc_refs=aux_dset_refs)

            '''
            Get all SHO_Fit groups for the Raw_Data and loop over them
            Get the Guess and Spectroscopic Datasets for each SHO_Fit group
            '''
            sho_list = find_results_groups(h5_raw, 'SHO_Fit')
            for h5_sho in sho_list:
                h5_sho_guess = h5_sho['Guess']
                h5_sho_spec_inds = h5_sho['Spectroscopic_Indices']
                h5_sho_spec_vals = h5_sho['Spectroscopic_Values']

                # Make sure we have correct spectroscopic indices for the given values
                ds_sho_spec_inds = create_spec_inds_from_vals(h5_sho_spec_inds[()])
                if not np.allclose(ds_sho_spec_inds, h5_sho_spec_inds[()]):
                    h5_sho_spec_inds[:, :] = ds_sho_spec_inds[:, :]

                # Get the labels and units for the Spectroscopic datasets
                h5_sho_spec_labels = get_attr(h5_sho_spec_inds, 'labels')
                link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals)
                sho_inds_and_vals = [h5_sho_spec_inds, h5_sho_spec_vals]

                for dset in sho_inds_and_vals:
                    spec_labels = get_attr(dset, 'labels')
                    try:
                        spec_units = get_attr(dset, 'units')

                        if len(spec_units) != len(spec_labels):
                            raise KeyError

                    except KeyError:
                        spec_units = [''.encode('utf-8') for _ in spec_labels]
                        dset.attrs['units'] = spec_units

                    except:
                        raise

                # Make region references in the
                for ilabel, label in enumerate(h5_sho_spec_labels):
                    label_slice = (slice(ilabel, ilabel + 1), slice(None))
                    if label == '':
                        label = 'Step'.encode('utf-8')
                    h5_sho_spec_inds.attrs[label] = h5_sho_spec_inds.regionref[label_slice]
                    h5_sho_spec_vals.attrs[label] = h5_sho_spec_vals.regionref[label_slice]

            h5_file.flush()

        h5_file.attrs['translator'] = 'V3patcher'.encode('utf-8')

        return h5_file
Esempio n. 45
0
    def _write_results_chunk(self):
        """
        Writes the labels and mean response to the h5 file

        Returns
        ---------
        h5_group : HDF5 Group reference
            Reference to the group that contains the clustering results
        """
        print('Writing clustering results to file.')
        num_clusters = self.__mean_resp.shape[0]

        h5_cluster_group = create_results_group(self.h5_main, self.process_name)

        write_simple_attrs(h5_cluster_group, self.parms_dict)

        h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels',
                                       'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1),
                                       h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals,
                                       aux_spec_prefix='Cluster_', dtype=np.uint32)

        if self.num_comps != self.h5_main.shape[1]:
            '''
            Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components
            Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data
            for now until a better method is figured out
            '''
            """
            if isinstance(self.data_slice[1], np.ndarray):
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()]

            else:
                centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]]

            ds_centroid_values.data[0, :] = centroid_vals_mat
            """
            if isinstance(self.data_slice[1], np.ndarray):
                vals_slice = self.data_slice[1].tolist()
            else:
                vals_slice = self.data_slice[1]
            vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze()
            new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals)
            h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True)

        else:
            h5_inds = self.h5_main.h5_spec_inds
            h5_vals = self.h5_main.h5_spec_vals

        # For now, link centroids with default spectroscopic indices and values.
        h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response',
                                          get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0],
                                          Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None,
                                          h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_',
                                          h5_spec_vals=h5_vals)

        # Marking completion:
        self._status_dset_name = 'completed_positions'
        self._h5_status_dset = h5_cluster_group.create_dataset(self._status_dset_name,
                                                               data=np.ones(self.h5_main.shape[0], dtype=np.uint8))
        # keeping legacy option:
        h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0]

        return h5_cluster_group
Esempio n. 46
0
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024):
    """
    Rebuild the Image from the SVD results on the windows
    Optionally, only use components less than n_comp.

    Parameters
    ----------
    h5_main : hdf5 Dataset
        dataset which SVD was performed on
    components : {int, iterable of int, slice} optional
        Defines which components to keep
        Default - None, all components kept

        Input Types
        integer : Components less than the input will be kept
        length 2 iterable of integers : Integers define start and stop of component slice to retain
        other iterable of integers or slice : Selection of component indices to retain
    cores : int, optional
        How many cores should be used to rebuild
        Default - None, all but 2 cores will be used, min 1
    max_RAM_mb : int, optional
        Maximum ammount of memory to use when rebuilding, in Mb.
        Default - 1024Mb

    Returns
    -------
    rebuilt_data : HDF5 Dataset
        the rebuilt dataset

    """
    comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1])
    if isinstance(comp_slice, np.ndarray):
        comp_slice = list(comp_slice)
    dset_name = h5_main.name.split('/')[-1]

    # Ensuring that at least one core is available for use / 2 cores are available for other use
    max_cores = max(1, cpu_count() - 2)
    #         print('max_cores',max_cores)
    if cores is not None:
        cores = min(round(abs(cores)), max_cores)
    else:
        cores = max_cores

    max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory())
    if cores != 1:
        max_memory = int(max_memory / 2)

    '''
    Get the handles for the SVD results
    '''
    try:
        h5_svd_group = find_results_groups(h5_main, 'SVD')[-1]

        h5_S = h5_svd_group['S']
        h5_U = h5_svd_group['U']
        h5_V = h5_svd_group['V']

    except KeyError:
        raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name))
    except:
        raise

    func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V)

    '''
    Calculate the size of a single batch that will fit in the available memory
    '''
    n_comps = h5_S[comp_slice].size
    mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps
    fixed_mem = h5_main.size * h5_main.dtype.itemsize

    if cores is None:
        free_mem = max_memory - fixed_mem
    else:
        free_mem = max_memory * 2 - fixed_mem

    batch_size = int(round(float(free_mem) / mem_per_pix))
    batch_slices = gen_batches(h5_U.shape[0], batch_size)

    print('Reconstructing in batches of {} positions.'.format(batch_size))
    print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2))

    '''
    Loop over all batches.
    '''
    ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :]))
    rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1]))
    for ibatch, batch in enumerate(batch_slices):
        rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V)

    rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype)

    print('Completed reconstruction of data from SVD results.  Writing to file.')
    '''
    Create the Group and dataset to hold the rebuild data
    '''
    rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data')
    h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data',
                                    get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'),
                                    None, None,
                                    h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals,
                                    h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals,
                                    chunks=h5_main.chunks, compression=h5_main.compression)

    if isinstance(comp_slice, slice):
        rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop)
    else:
        rebuilt_grp.attrs['components_used'] = components

    copy_attributes(h5_main, h5_rebuilt, skip_refs=False)

    h5_main.file.flush()

    print('Done writing reconstructed data to file.')

    return h5_rebuilt
Esempio n. 47
0
    def test_write_simple_tree(self):
        file_path = 'test.h5'
        self.__delete_existing_file(file_path)
        with h5py.File(file_path) as h5_f:

            inner_dset_data = np.random.rand(5, 7)
            inner_dset_attrs = {'att_1': 'string_val',
                                'att_2': 1.2345,
                                'att_3': [1, 2, 3, 4],
                                'att_4': ['str_1', 'str_2', 'str_3'],
                                'labels': {'even_rows': (slice(0, None, 2), slice(None)),
                                           'odd_rows': (slice(1, None, 2), slice(None))}
                                }
            inner_dset = VirtualDataset('inner_dset', inner_dset_data)
            inner_dset.attrs = inner_dset_attrs.copy()

            attrs_inner_grp = {'att_1': 'string_val',
                               'att_2': 1.2345,
                               'att_3': [1, 2, 3, 4],
                               'att_4': ['str_1', 'str_2', 'str_3']}
            inner_group = VirtualGroup('indexed_inner_group_')
            inner_group.attrs = attrs_inner_grp
            inner_group.add_children(inner_dset)

            outer_dset_data = np.random.rand(5, 7)
            outer_dset_attrs = {'att_1': 'string_val',
                                'att_2': 1.2345,
                                'att_3': [1, 2, 3, 4],
                                'att_4': ['str_1', 'str_2', 'str_3'],
                                'labels': {'even_rows': (slice(0, None, 2), slice(None)),
                                           'odd_rows': (slice(1, None, 2), slice(None))}
                                }
            outer_dset = VirtualDataset('test', outer_dset_data, parent='/test_group')
            outer_dset.attrs = outer_dset_attrs.copy()

            attrs_outer_grp = {'att_1': 'string_val',
                               'att_2': 1.2345,
                               'att_3': [1, 2, 3, 4],
                               'att_4': ['str_1', 'str_2', 'str_3']}
            outer_group = VirtualGroup('unindexed_outer_group')
            outer_group.attrs = attrs_outer_grp
            outer_group.add_children([inner_group, outer_dset])

            writer = HDFwriter(h5_f)
            h5_refs_list = writer.write(outer_group)

            # I don't know of a more elegant way to do this:
            [h5_outer_dset] = get_h5_obj_refs([outer_dset.name], h5_refs_list)
            [h5_inner_dset] = get_h5_obj_refs([inner_dset.name], h5_refs_list)
            [h5_outer_group] = get_h5_obj_refs([outer_group.name], h5_refs_list)
            [h5_inner_group] = get_h5_obj_refs(['indexed_inner_group_000'], h5_refs_list)

            self.assertIsInstance(h5_outer_dset, h5py.Dataset)
            self.assertIsInstance(h5_inner_dset, h5py.Dataset)
            self.assertIsInstance(h5_outer_group, h5py.Group)
            self.assertIsInstance(h5_inner_group, h5py.Group)

            # check assertions for the inner dataset first
            self.assertEqual(h5_inner_dset.parent, h5_inner_group)

            reg_ref = inner_dset_attrs.pop('labels')

            self.assertEqual(len(h5_inner_dset.attrs), len(inner_dset_attrs) + 1 + len(reg_ref))

            for key, expected_val in inner_dset_attrs.items():
                self.assertTrue(np.all(get_attr(h5_inner_dset, key) == expected_val))

            self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_inner_dset, 'labels')]))

            expected_data = [inner_dset_data[:None:2], inner_dset_data[1:None:2]]
            written_data = [h5_inner_dset[h5_inner_dset.attrs['even_rows']], h5_inner_dset[h5_inner_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

            # check assertions for the inner data group next:
            self.assertEqual(h5_inner_group.parent, h5_outer_group)
            for key, expected_val in attrs_inner_grp.items():
                self.assertTrue(np.all(get_attr(h5_inner_group, key) == expected_val))

            # check the outer dataset next:
            self.assertEqual(h5_outer_dset.parent, h5_outer_group)

            reg_ref = outer_dset_attrs.pop('labels')

            self.assertEqual(len(h5_outer_dset.attrs), len(outer_dset_attrs) + 1 + len(reg_ref))

            for key, expected_val in outer_dset_attrs.items():
                self.assertTrue(np.all(get_attr(h5_outer_dset, key) == expected_val))

            self.assertTrue(np.all([x in list(reg_ref.keys()) for x in get_attr(h5_outer_dset, 'labels')]))

            expected_data = [outer_dset_data[:None:2], outer_dset_data[1:None:2]]
            written_data = [h5_outer_dset[h5_outer_dset.attrs['even_rows']],
                            h5_outer_dset[h5_outer_dset.attrs['odd_rows']]]

            for exp, act in zip(expected_data, written_data):
                self.assertTrue(np.allclose(exp, act))

            # Finally check the outer group:
            self.assertEqual(h5_outer_group.parent, h5_f)
            for key, expected_val in attrs_outer_grp.items():
                self.assertTrue(np.all(get_attr(h5_outer_group, key) == expected_val))

        os.remove(file_path)
Esempio n. 48
0
    def _setup_h5(self, data_gen_parms):
        """
        Setups up the hdf5 file structure before doing the actual generation

        Parameters
        ----------
        data_gen_parms : dict
            Dictionary containing the parameters to write to the Measurement Group as attributes

        Returns
        -------

        """

        '''
        Build the group structure down to the channel group
        '''
        # Set up the basic group structure
        root_grp = VirtualGroup('')
        root_parms = generate_dummy_main_parms()
        root_parms['translator'] = 'FAKEBEPS'
        root_parms['data_type'] = data_gen_parms['data_type']
        root_grp.attrs = root_parms

        meas_grp = VirtualGroup('Measurement_')
        chan_grp = VirtualGroup('Channel_')

        meas_grp.attrs.update(data_gen_parms)

        # Create the Position and Spectroscopic datasets for the Raw Data
        ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals = self._build_ancillary_datasets()

        raw_chunking = calc_chunks([self.n_pixels,
                                    self.n_spec_bins],
                                   np.complex64(0).itemsize,
                                   unit_chunks=[1, self.n_bins])

        ds_raw_data = VirtualDataset('Raw_Data', data=None,
                                     maxshape=[self.n_pixels, self.n_spec_bins],
                                     dtype=np.complex64,
                                     compression='gzip',
                                     chunking=raw_chunking,
                                     parent=meas_grp)

        chan_grp.add_children([ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals,
                               ds_raw_data])
        meas_grp.add_children([chan_grp])
        root_grp.add_children([meas_grp])

        hdf = HDFwriter(self.h5_path)
        hdf.delete()
        h5_refs = hdf.write(root_grp)

        # Delete the MicroDatasets to save memory
        del ds_raw_data, ds_spec_inds, ds_spec_vals, ds_pos_inds, ds_pos_vals

        # Get the file and Raw_Data objects
        h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
        h5_chan_grp = h5_raw.parent

        # Get the Position and Spectroscopic dataset objects
        h5_pos_inds = get_h5_obj_refs(['Position_Indices'], h5_refs)[0]
        h5_pos_vals = get_h5_obj_refs(['Position_Values'], h5_refs)[0]
        h5_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_refs)[0]
        h5_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_refs)[0]

        # Link the Position and Spectroscopic datasets as attributes of Raw_Data
        link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals)

        '''
        Build the SHO Group
        '''
        sho_grp = VirtualGroup('Raw_Data-SHO_Fit_', parent=h5_chan_grp.name)

        # Build the Spectroscopic datasets for the SHO Guess and Fit
        sho_spec_starts = np.where(h5_spec_inds[h5_spec_inds.attrs['Frequency']].squeeze() == 0)[0]
        sho_spec_labs = get_attr(h5_spec_inds, 'labels')
        ds_sho_spec_inds, ds_sho_spec_vals = build_reduced_spec_dsets(h5_spec_inds,
                                                                      h5_spec_vals,
                                                                      keep_dim=sho_spec_labs != 'Frequency',
                                                                      step_starts=sho_spec_starts)

        sho_chunking = calc_chunks([self.n_pixels,
                                    self.n_sho_bins],
                                   sho32.itemsize,
                                   unit_chunks=[1, 1])
        ds_sho_fit = VirtualDataset('Fit', data=None,
                                    maxshape=[self.n_pixels, self.n_sho_bins],
                                    dtype=sho32,
                                    compression='gzip',
                                    chunking=sho_chunking,
                                    parent=sho_grp)
        ds_sho_guess = VirtualDataset('Guess', data=None,
                                      maxshape=[self.n_pixels, self.n_sho_bins],
                                      dtype=sho32,
                                      compression='gzip',
                                      chunking=sho_chunking,
                                      parent=sho_grp)

        sho_grp.add_children([ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals])

        # Write the SHO group and datasets to the file and delete the MicroDataset objects
        h5_sho_refs = hdf.write(sho_grp)
        del ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals

        # Get the dataset handles for the fit and guess
        h5_sho_fit = get_h5_obj_refs(['Fit'], h5_sho_refs)[0]
        h5_sho_guess = get_h5_obj_refs(['Guess'], h5_sho_refs)[0]

        # Get the dataset handles for the SHO Spectroscopic datasets
        h5_sho_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_sho_refs)[0]
        h5_sho_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_sho_refs)[0]

        # Link the Position and Spectroscopic datasets as attributes of the SHO Fit and Guess
        link_as_main(h5_sho_fit, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals)
        link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals)

        '''
        Build the loop group
        '''
        loop_grp = VirtualGroup('Fit-Loop_Fit_', parent=h5_sho_fit.parent.name)

        # Build the Spectroscopic datasets for the loops
        loop_spec_starts = np.where(h5_sho_spec_inds[h5_sho_spec_inds.attrs['DC_Offset']].squeeze() == 0)[0]
        loop_spec_labs = get_attr(h5_sho_spec_inds, 'labels')
        ds_loop_spec_inds, ds_loop_spec_vals = build_reduced_spec_dsets(h5_sho_spec_inds,
                                                                        h5_sho_spec_vals,
                                                                        keep_dim=loop_spec_labs != 'DC_Offset',
                                                                        step_starts=loop_spec_starts)

        # Create the loop fit and guess MicroDatasets
        loop_chunking = calc_chunks([self.n_pixels, self.n_loops],
                                    loop_fit32.itemsize,
                                    unit_chunks=[1, 1])
        ds_loop_fit = VirtualDataset('Fit', data=None,
                                     maxshape=[self.n_pixels, self.n_loops],
                                     dtype=loop_fit32,
                                     compression='gzip',
                                     chunking=loop_chunking,
                                     parent=loop_grp)

        ds_loop_guess = VirtualDataset('Guess', data=None,
                                       maxshape=[self.n_pixels, self.n_loops],
                                       dtype=loop_fit32,
                                       compression='gzip',
                                       chunking=loop_chunking,
                                       parent=loop_grp)

        # Add the datasets to the loop group then write it to the file
        loop_grp.add_children([ds_loop_fit, ds_loop_guess, ds_loop_spec_inds, ds_loop_spec_vals])
        h5_loop_refs = hdf.write(loop_grp)

        # Delete the MicroDatasets
        del ds_loop_spec_vals, ds_loop_spec_inds, ds_loop_guess, ds_loop_fit

        # Get the handles to the datasets
        h5_loop_fit = get_h5_obj_refs(['Fit'], h5_loop_refs)[0]
        h5_loop_guess = get_h5_obj_refs(['Guess'], h5_loop_refs)[0]
        h5_loop_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_loop_refs)[0]
        h5_loop_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_loop_refs)[0]

        # Link the Position and Spectroscopic datasets to the Loop Guess and Fit
        link_as_main(h5_loop_fit, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals)
        link_as_main(h5_loop_guess, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals)

        self.h5_raw = USIDataset(h5_raw)
        self.h5_sho_guess = USIDataset(h5_sho_guess)
        self.h5_sho_fit = USIDataset(h5_sho_fit)
        self.h5_loop_guess = USIDataset(h5_loop_guess)
        self.h5_loop_fit = USIDataset(h5_loop_fit)
        self.h5_spec_vals = h5_spec_vals
        self.h5_spec_inds = h5_spec_inds
        self.h5_sho_spec_inds = h5_sho_spec_inds
        self.h5_sho_spec_vals = h5_sho_spec_vals
        self.h5_loop_spec_inds = h5_loop_spec_inds
        self.h5_loop_spec_vals = h5_loop_spec_vals
        self.h5_file = h5_raw.file

        return
Esempio n. 49
0
    def _get_sho_chunk_sizes(self, max_mem_mb):
        """
        Calculates the largest number of positions that can be read into memory for a single FORC cycle

        Parameters
        ----------
        max_mem_mb : unsigned int
            Maximum allowable memory in megabytes
        verbose : Boolean (Optional. Default is False)
            Whether or not to print debugging statements

        Returns
        -------
        max_pos : unsigned int
            largest number of positions that can be read into memory for a single FORC cycle
        sho_spec_inds_per_forc : unsigned int
            Number of indices in the SHO spectroscopic table that will be used per read
        metrics_spec_inds_per_forc : unsigned int
            Number of indices in the Loop metrics spectroscopic table that will be used per read
        """
        # Step 1: Find number of FORC cycles and repeats (if any), DC steps, and number of loops
        # dc_offset_index = np.argwhere(self._sho_spec_inds.attrs['labels'] == 'DC_Offset').squeeze()
        num_dc_steps = np.unique(self._sho_spec_inds[self._fit_spec_index, :]).size
        all_spec_dims = list(range(self._sho_spec_inds.shape[0]))
        all_spec_dims.remove(self._fit_spec_index)

        # Remove FORC_cycles
        sho_spec_labels = self.h5_main.spec_dim_labels
        has_forcs = 'FORC' in sho_spec_labels or 'FORC_Cycle' in sho_spec_labels
        if has_forcs:
            forc_name = 'FORC' if 'FORC' in sho_spec_labels else 'FORC_Cycle'
            try:
                forc_pos = sho_spec_labels.index(forc_name)
            except Exception:
                raise
            # forc_pos = np.argwhere(sho_spec_labels == forc_name)[0][0]
            self._num_forcs = np.unique(self._sho_spec_inds[forc_pos]).size
            all_spec_dims.remove(forc_pos)

            # Remove FORC_repeats
            has_forc_repeats = 'FORC_repeat' in sho_spec_labels
            if has_forc_repeats:
                try:
                    forc_repeat_pos = sho_spec_labels.index('FORC_repeat')
                except Exception:
                    raise
                # forc_repeat_pos = np.argwhere(sho_spec_labels == 'FORC_repeat')[0][0]
                self._num_forc_repeats = np.unique(self._sho_spec_inds[forc_repeat_pos]).size
                all_spec_dims.remove(forc_repeat_pos)

        # calculate number of loops:
        if len(all_spec_dims) == 0:
            loop_dims = 1
        else:
            loop_dims = get_dimensionality(self._sho_spec_inds, all_spec_dims)
        loops_per_forc = np.product(loop_dims)

        # Step 2: Calculate the largest number of FORCS and positions that can be read given memory limits:
        size_per_forc = num_dc_steps * loops_per_forc * len(self.h5_main.dtype) * self.h5_main.dtype[0].itemsize
        """
        How we arrive at the number for the overhead (how many times the size of the data-chunk we will use in memory)
        1 for the original data, 1 for data copied to all children processes, 1 for results, 0.5 for fit, guess, misc
        """
        mem_overhead = 3.5
        max_pos = int(max_mem_mb * 1024 ** 2 / (size_per_forc * mem_overhead))
        if self._verbose:
            print('Can read {} of {} pixels given a {} MB memory limit'.format(max_pos,
                                                                               self._sho_pos_inds.shape[0],
                                                                               max_mem_mb))
        self.max_pos = int(min(self._sho_pos_inds.shape[0], max_pos))
        self.sho_spec_inds_per_forc = int(self._sho_spec_inds.shape[1] / self._num_forcs / self._num_forc_repeats)
        self.metrics_spec_inds_per_forc = int(self._met_spec_inds.shape[1] / self._num_forcs / self._num_forc_repeats)

        # Step 3: Read allowed chunk
        self._sho_all_but_forc_inds = list(range(self._sho_spec_inds.shape[0]))
        self._met_all_but_forc_inds = list(range(self._met_spec_inds.shape[0]))
        if self._num_forcs > 1:
            self._sho_all_but_forc_inds.remove(forc_pos)
            met_forc_pos = np.argwhere(get_attr(self._met_spec_inds, 'labels') == forc_name)[0][0]
            self._met_all_but_forc_inds.remove(met_forc_pos)

            if self._num_forc_repeats > 1:
                self._sho_all_but_forc_inds.remove(forc_repeat_pos)
                met_forc_repeat_pos = np.argwhere(get_attr(self._met_spec_inds, 'labels') == 'FORC_repeat')[0][0]
                self._met_all_but_forc_inds.remove(met_forc_repeat_pos)

        return
Esempio n. 50
0
def plot_cluster_h5_group(h5_group, labels_kwargs=None, centroids_kwargs=None):
    """
    Plots the cluster labels and mean response for each cluster

    Parameters
    ----------
    h5_group : h5py.Datagroup object
        H5 group containing the labels and mean response
    labels_kwargs : dict, optional
        keyword arguments for the labels plot. NOT enabled yet.
    centroids_kwargs : dict, optional
        keyword arguments for the centroids plot. NOT enabled yet.

    Returns
    -------
    fig_labels : figure handle
        Figure containing the labels
    fig_centroids : figure handle
        Figure containing the centroids
    """
    if not isinstance(h5_group, h5py.Group):
        raise TypeError('h5_group should be a h5py.Group')
    h5_labels = USIDataset(h5_group['Labels'])
    h5_centroids = USIDataset(h5_group['Mean_Response'])

    labels_mat = np.squeeze(h5_labels.get_n_dim_form())
    if labels_mat.ndim > 3:
        print('Unable to visualize 4 or more dimensional labels!')
    if labels_mat.ndim == 1:
        fig_labs, axis_labs = plt.subplots(figsize=(5.5, 5))
        axis_labs.plot(h5_labels.get_pos_values(h5_labels.pos_dim_labels[0]), labels_mat)
        axis_labs.set_xlabel(h5_labels.pos_dim_descriptors[0])
        axis_labs.set_ylabel('Cluster index')
        axis_labs.set_title(get_attr(h5_group, 'cluster_algorithm') + ' Labels')
    elif labels_mat.ndim == 2:
        fig_labs, axis_labs = plot_cluster_labels(labels_mat, num_clusters=h5_centroids.shape[0],
                                                  x_label=h5_labels.pos_dim_descriptors[0],
                                                  y_label=h5_labels.pos_dim_descriptors[1],
                                                  x_vec=h5_labels.get_pos_values(h5_labels.pos_dim_labels[0]),
                                                  y_vec=h5_labels.get_pos_values(h5_labels.pos_dim_labels[1]),
                                                  title=get_attr(h5_group, 'cluster_algorithm') + ' Labels')

    # TODO: probably not a great idea to load the entire dataset to memory
    centroids_mat = h5_centroids.get_n_dim_form()
    if len(h5_centroids.spec_dim_labels) == 1:
        legend_mode = 2
        if h5_centroids.shape[0] < 6:
            legend_mode = 1
        fig_cent, axis_cent = plot_cluster_centroids(centroids_mat,
                                                     h5_centroids.get_spec_values(h5_centroids.spec_dim_labels[0]),
                                                     legend_mode=legend_mode,
                                                     x_label=h5_centroids.spec_dim_descriptors[0],
                                                     y_label=h5_centroids.data_descriptor,
                                                     overlayed=h5_centroids.shape[0] < 6,
                                                     title=get_attr(h5_group,
                                                                    'cluster_algorithm') + ' Centroid',
                                                     amp_units=get_attr(h5_centroids, 'units'))
    elif len(h5_centroids.spec_dim_labels) == 2:
        # stack of spectrograms
        if h5_centroids.dtype in [np.complex64, np.complex128, np.complex]:
            fig_cent, axis_cent = plot_complex_spectra(centroids_mat, subtitle_prefix='Cluster',
                                                       title=get_attr(h5_group, 'cluster_algorithm') + ' Centroid',
                                                       x_label=h5_centroids.spec_dim_descriptors[0],
                                                       y_label=h5_centroids.spec_dim_descriptors[1],
                                                       amp_units=get_attr(h5_centroids, 'units'))
        else:
            fig_cent, axis_cent = plot_map_stack(centroids_mat, color_bar_mode='each', evenly_spaced=True,
                                                 title='Cluster',
                                                 heading=get_attr(h5_group,
                                                                  'cluster_algorithm') + ' Centroid')
    return fig_labs, fig_cent
Esempio n. 51
0
    def _check_for_old_fit(self):
        """
        Returns three lists of h5py.Dataset objects where the group contained:
            1. Completed guess only
            2. Partial Fit
            3. Completed Fit

        Returns
        -------

        """
        # First find all groups that match the basic condition of matching tool name
        all_groups = find_results_groups(self.h5_main, self._fitter_name)
        if self._verbose:
            print('Groups that matched the nomenclature: {}'.format(all_groups))

        # Next sort these groups into three categories:
        completed_guess = []
        partial_fits = []
        completed_fits = []

        for h5_group in all_groups:

            if 'Fit' in h5_group.keys():
                # check group for fit attribute

                h5_fit = h5_group['Fit']

                # check Fit dataset against parms_dict
                if not check_for_matching_attrs(h5_fit, new_parms=self._parms_dict, verbose=self._verbose):
                    if self._verbose:
                        print('{} did not match the given parameters'.format(h5_fit.name))
                    continue

                # sort this dataset:
                try:
                    last_pix = get_attr(h5_fit, 'last_pixel')
                except KeyError:
                    last_pix = None

                # For now skip any fits that are missing 'last_pixel'
                if last_pix is None:
                    continue
                elif last_pix < self.h5_main.shape[0]:
                    partial_fits.append(h5_fit.parent)
                else:
                    completed_fits.append(h5_fit)
            else:
                if 'Guess' in h5_group.keys():
                    h5_guess = h5_group['Guess']

                    # sort this dataset:
                    try:
                        last_pix = get_attr(h5_guess, 'last_pixel')
                    except KeyError:
                        last_pix = None

                    # For now skip any fits that are missing 'last_pixel'
                    if last_pix is None:
                        continue
                    elif last_pix == self.h5_main.shape[0]:
                        if self._verbose:
                            print('{} was a completed Guess'.format(h5_guess.name))
                        completed_guess.append(h5_guess)
                    else:
                        if self._verbose:
                            print('{} did not not have completed Guesses'.format(h5_guess.name))
                else:
                    if self._verbose:
                        print('{} did not even have Guess. Categorizing as defective Group'.format(h5_group.name))

        return completed_guess, partial_fits, completed_fits