def test_stack_real_nd_to_target_complex_h5_legal(self): with h5py.File(file_path, mode='r') as h5_f: h5_real = h5_f['real2'] expected = h5_real[:, :, :3] + 1j * h5_real[:, :, 3:] actual = dtype_utils.stack_real_to_target_dtype(h5_real, np.complex) self.assertTrue(np.allclose(actual, expected))
def stack_real_to_target_complex_nd(self): expected = 5 * np.random.rand(2, 3, 5, 8) + 7j * np.random.rand( 2, 3, 5, 8) real_val = np.concatenate( [np.real(expected), np.imag(expected)], axis=3) actual = dtype_utils.stack_real_to_target_dtype(real_val, np.complex) self.assertTrue(np.allclose(actual, expected))
def test_compound_nd(self): num_elems = (2, 3, 5, 7) structured_array = np.zeros(shape=num_elems, dtype=struc_dtype) structured_array['r'] = r_vals = np.random.random(size=num_elems) structured_array['g'] = g_vals = np.random.randint(0, high=1024, size=num_elems) structured_array['b'] = b_vals = np.random.random(size=num_elems) real_val = np.concatenate((r_vals, g_vals, b_vals), axis=len(num_elems) - 1) actual = dtype_utils.stack_real_to_target_dtype(real_val, struc_dtype) self.assertTrue(compare_structured_arrays(actual, structured_array))
def __mean_resp_for_cluster(clust_ind, h5_raw, labels_vec, data_slice, xform_func): # get all pixels with this label targ_pos = np.argwhere(labels_vec == clust_ind) # slice to get the responses for all these pixels, ensure that it's 2d data_chunk = np.atleast_2d(h5_raw[:, data_slice[1]][targ_pos, :]) # transform to real from whatever type it was avg_data = np.mean(xform_func(data_chunk), axis=0, keepdims=True) # transform back to the source data type and insert into the mean response return np.squeeze(stack_real_to_target_dtype(avg_data, h5_raw.dtype))
def test(self, override=False): """ Applies randomised VD to the dataset. This function does NOT write results to the hdf5 file. Call compute() to write to the file. Handles complex, compound datasets such that the V matrix is of the same data-type as the input matrix. Parameters ---------- override : bool, optional. default = False Set to true to recompute results if prior results are available. Else, returns existing results Returns ------- U : :class:`numpy.ndarray` Abundance matrix S : :class:`numpy.ndarray` variance vector V : :class:`numpy.ndarray` eigenvector matrix """ ''' Check if a number of compnents has been set and ensure that the number is less than the minimum axis length of the data. If both conditions are met, use fsvd. If not use the regular svd. C.Smith -- We might need to put a lower limit on num_comps in the future. I don't know enough about svd to be sure. ''' if not override: if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0: self.h5_results_grp = self.duplicate_h5_groups[-1] print('Returning previously computed results from: {}'.format(self.h5_results_grp.name)) print('set the "override" flag to True to recompute results') return reshape_to_n_dims(self.h5_results_grp['U'])[0], self.h5_results_grp['S'][()], \ reshape_to_n_dims(self.h5_results_grp['V'])[0] self.h5_results_grp = None t1 = time.time() self.__u, self.__s, self.__v = randomized_svd(self.data_transform_func(self.h5_main), self.num_components, n_iter=3) self.__v = stack_real_to_target_dtype(self.__v, self.h5_main.dtype) print('Took {} to compute randomized SVD'.format(format_time(time.time() - t1))) u_mat, success = reshape_to_n_dims(self.__u, h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims(np.arange(self.__u.shape[1]), axis=0)) if not success: raise ValueError('Could not reshape U to N-Dimensional dataset! Error:' + success) v_mat, success = reshape_to_n_dims(self.__v, h5_pos=np.expand_dims(np.arange(self.__u.shape[1]), axis=1), h5_spec=self.h5_main.h5_spec_inds) if not success: raise ValueError('Could not reshape V to N-Dimensional dataset! Error:' + success) return u_mat, self.__s, v_mat
def stack_real_to_target_compound_single(self): num_elems = 1 structured_array = np.zeros(shape=num_elems, dtype=struc_dtype) structured_array['r'] = r_vals = np.random.random(size=num_elems) structured_array['g'] = g_vals = np.random.randint(0, high=1024, size=num_elems) structured_array['b'] = b_vals = np.random.random(size=num_elems) real_val = np.concatenate((r_vals, g_vals, b_vals)) actual = dtype_utils.stack_real_to_target_dtype(real_val, struc_dtype) self.assertTrue(compare_structured_arrays(actual, structured_array[0]))
def test(self, override=False): """ Decomposes the hdf5 dataset to calculate the components and projection. This function does NOT write results to the hdf5 file. Call :meth:`~pycroscopy.processing.Decomposition.compute()` to write to the file. Handles complex, compound datasets such that the components are of the same data-type as the input matrix. Parameters ---------- override : bool, optional. default = False Set to true to recompute results if prior results are available. Else, returns existing results Returns ------- components : :class:`numpy.ndarray` Components projections : :class:`numpy.ndarray` Projections """ if not override: if isinstance(self.duplicate_h5_groups, list) and len(self.duplicate_h5_groups) > 0: self.h5_results_grp = self.duplicate_h5_groups[-1] print('Returning previously computed results from: {}'.format(self.h5_results_grp.name)) print('set the "override" flag to True to recompute results') return USIDataset(self.h5_results_grp['Components']).get_n_dim_form(), \ USIDataset(self.h5_results_grp['Projection']).get_n_dim_form() self.h5_results_grp = None print('Performing Decomposition on {}.'.format(self.h5_main.name)) t0 = time.time() self._fit() self._transform() print('Took {} to compute {}'.format(format_time(time.time() - t0), self.method_name)) self.__components = stack_real_to_target_dtype(self.estimator.components_, self.h5_main.dtype) projection_mat, success = reshape_to_n_dims(self.__projection, h5_pos=self.h5_main.h5_pos_inds, h5_spec=np.expand_dims(np.arange(self.__projection.shape[1]), axis=0)) if not success: raise ValueError('Could not reshape projections to N-Dimensional dataset! Error:' + success) components_mat, success = reshape_to_n_dims(self.__components, h5_spec=self.h5_main.h5_spec_inds, h5_pos=np.expand_dims(np.arange(self.__components.shape[0]), axis=1)) if not success: raise ValueError('Could not reshape components to N-Dimensional dataset! Error:' + success) return components_mat, projection_mat
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice( components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024**2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError( 'SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0**2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print( 'Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format( comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt
def test_cast_str_type(self): uint_array = np.random.randint(0, high=15, size=(3, 4, 5), dtype=np.uint16) actual = dtype_utils.stack_real_to_target_dtype(uint_array, np.dtype('<f4')) self.assertTrue(np.allclose(actual, np.float32(uint_array)))
def test_complex_single(self): expected = 4.32 + 5.67j real_val = [np.real(expected), np.imag(expected)] actual = dtype_utils.stack_real_to_target_dtype(real_val, np.complex) self.assertTrue(np.allclose(actual, expected))
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print('Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt