def test_subrun_second(): sample = SampleLogs() # do it wrong with pytest.raises(RuntimeError): sample['variable1'] = np.linspace(0., 100., 5) # do it right sample.subruns = [1, 2, 3, 4, 5] sample['variable1'] = np.linspace(0., 100., 5)
def test_reassign_subruns(): sample = SampleLogs() sample.subruns = [1, 2, 3, 4] sample.subruns = [1, 2, 3, 4] # setting same value is fine with pytest.raises(RuntimeError): sample.subruns = [1, 3, 4] with pytest.raises(RuntimeError): sample.subruns = [4, 3, 2, 1]
def test_one(): sample = SampleLogs() sample.subruns = 1 assert len(sample) == 0 sample['variable1'] = 27 assert len(sample) == 1 with pytest.raises(ValueError): sample['variable1'] = [27, 28] assert sorted(sample.plottable_logs()) == ['sub-runs', 'variable1'] assert sample.constant_logs() == ['variable1']
def read_sample_logs(self): """Get sample logs Retrieve all the (sample) logs from Hidra project file. Raw information retrieved from rs project file is numpy arrays Returns ------- ndarray, dict ndarray : 1D array for sub runs dict : dict[sample log name] for sample logs in ndarray """ # Get the group logs_group = self._project_h5[HidraConstants.RAW_DATA][ HidraConstants.SAMPLE_LOGS] if HidraConstants.SUB_RUNS not in logs_group.keys(): raise RuntimeError( 'Failed to find {} in {} group of the file'.format( HidraConstants.SUB_RUNS, HidraConstants.SAMPLE_LOGS)) # Get 2theta and others samplelogs = SampleLogs() # first set subruns samplelogs[HidraConstants.SUB_RUNS] = logs_group[ HidraConstants.SUB_RUNS].value for log_name in logs_group.keys(): samplelogs[log_name] = logs_group[log_name].value return samplelogs
def read_sample_logs(self): """Get sample logs Retrieve all the (sample) logs from Hidra project file. Raw information retrieved from rs project file is numpy arrays Returns ------- ndarray, dict ndarray : 1D array for sub runs dict : dict[sample log name] for sample logs in ndarray """ # Get the group logs_group = self._project_h5[HidraConstants.RAW_DATA][ HidraConstants.SAMPLE_LOGS] if HidraConstants.SUB_RUNS not in logs_group.keys(): raise RuntimeError( 'Failed to find {} in {} group of the file'.format( HidraConstants.SUB_RUNS, HidraConstants.SAMPLE_LOGS)) # Get 2theta and others samplelogs = SampleLogs() # first set subruns samplelogs[HidraConstants.SUB_RUNS] = logs_group[ HidraConstants.SUB_RUNS].value for log_name in logs_group.keys(): data_set = logs_group[log_name] # an instance of HDF5::DataSet try: samplelogs[log_name, data_set.attrs['units']] = data_set.value except KeyError: # this log entry has no units. True for old project files samplelogs[log_name] = data_set.value return samplelogs
def test_multi(): sample = SampleLogs() sample.subruns = [1, 2, 3, 4, 5] sample['constant1'] = np.zeros(5) + 42 sample['variable1'] = np.linspace(0., 100., 5) sample['string1'] = np.array( ['a'] * sample.subruns.size) # will be constant as well # names of logs assert sorted( sample.plottable_logs()) == ['constant1', 'sub-runs', 'variable1'] assert sorted(sample.constant_logs()) == ['constant1', 'string1'] # slicing np.testing.assert_equal(sample['variable1'], [0., 25., 50., 75., 100.]) np.testing.assert_equal(sample['variable1', 3], [50.]) np.testing.assert_equal(sample['variable1', [1, 2, 3]], [0., 25., 50.]) with pytest.raises(IndexError): np.testing.assert_equal(sample['variable1', [0]], [0., 50., 75., 100.]) with pytest.raises(IndexError): np.testing.assert_equal(sample['variable1', [10]], [0., 50., 75., 100.])
def __init__(self, name='hidradata'): """ initialization """ # workspace name self._name = name # raw counts self._raw_counts = dict() # dict [sub-run] = count vector # wave length self._wave_length = None # single wave length for all sub runs self._wave_length_dict = None self._wave_length_calibrated_dict = None # diffraction self._2theta_matrix = None # ndarray. shape = (m, ) m = number of 2theta self._diff_data_set = dict( ) # [mask id] = ndarray: shape=(n, m), n: number of sub-run, m: number of of 2theta self._var_data_set = dict( ) # [mask id] = ndarray: shape=(n, m), n: number of sub-run, m: number of of 2theta # instrument self._instrument_setup = None self._instrument_geometry_shift = None # geometry shift # sample logs self._sample_logs = SampleLogs( ) # sample logs: [log name, sub run] = value # raw Hidra project file self._project_file_name = None self._project_file = None # Masks self._default_mask = None self._mask_dict = dict()
class HidraWorkspace(object): """ This workspace is the central data structure to manage all the raw and/or processed data. It provides - container for raw counts - container for reduced diffraction data - container for fitted peaks' parameters - container for instrument information """ def __init__(self, name='hidradata'): """ initialization """ # workspace name self._name = name # raw counts self._raw_counts = dict() # dict [sub-run] = count vector # wave length self._wave_length = None # single wave length for all sub runs self._wave_length_dict = None self._wave_length_calibrated_dict = None # diffraction self._2theta_matrix = None # ndarray. shape = (m, ) m = number of 2theta self._diff_data_set = dict( ) # [mask id] = ndarray: shape=(n, m), n: number of sub-run, m: number of of 2theta self._var_data_set = dict( ) # [mask id] = ndarray: shape=(n, m), n: number of sub-run, m: number of of 2theta # instrument self._instrument_setup = None self._instrument_geometry_shift = None # geometry shift # sample logs self._sample_logs = SampleLogs( ) # sample logs: [log name, sub run] = value # raw Hidra project file self._project_file_name = None self._project_file = None # Masks self._default_mask = None self._mask_dict = dict() @property def name(self): """ Workspace name :return: """ return self._name @property def hidra_project_file(self): """Name of the associated HiDRA project file Returns ------- """ return self._project_file_name def _load_raw_counts(self, hidra_file): """ Load raw detector counts from HIDRA file :param hidra_file: :return: """ checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) for sub_run_i in self._sample_logs.subruns: counts_vec_i = hidra_file.read_raw_counts(sub_run_i) self._raw_counts[sub_run_i] = counts_vec_i # END-FOR return def _load_reduced_diffraction_data(self, hidra_file): """ Load reduced diffraction data from HIDRA file :param hidra_file: HidraProjectFile instance :return: """ # Check inputs checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) # get 2theta value try: vec_2theta = hidra_file.read_diffraction_2theta_array() except KeyError as key_err: print( '[INFO] Unable to load 2theta vector from HidraProject file due to {}.' 'It is very likely that no reduced data is recorded.' ''.format(key_err)) return # TRY-CATCH # Get number of spectra num_spec = len(hidra_file.read_sub_runs()) # Promote to 2theta from vector to array if len(vec_2theta.shape) == 1: # convert from 1D array to 2D tth_size = vec_2theta.shape[0] matrix_2theta = numpy.repeat(vec_2theta.reshape(1, tth_size), num_spec, axis=0) else: matrix_2theta = vec_2theta # Set value self._2theta_matrix = numpy.copy(matrix_2theta) # initialize data set for reduced diffraction patterns diff_mask_list = hidra_file.read_diffraction_masks() for mask_name in diff_mask_list: if mask_name == 'main': mask_name = None self._diff_data_set[mask_name] = numpy.ndarray( shape=(num_spec, vec_2theta.shape[0]), dtype='float') # END-FOR # Load data: all including masks / ROI for mask_name in diff_mask_list: # force to None if mask_name == 'main': mask_name = None self._diff_data_set[ mask_name] = hidra_file.read_diffraction_intensity_vector( mask_id=mask_name, sub_run=None) # Load data: all including masks / ROI for mask_name in diff_mask_list: # force to None if mask_name == 'main': mask_name = None self._var_data_set[ mask_name] = hidra_file.read_diffraction_variance_vector( mask_id=mask_name, sub_run=None) if self._var_data_set[mask_name] is None: self._var_data_set[mask_name] = numpy.sqrt( self._diff_data_set[mask_name]) print('[INFO] Loaded diffraction data from {} includes : {}' ''.format(self._project_file_name, self._diff_data_set.keys())) def _load_instrument(self, hidra_file): """ Load instrument setup from HIDRA file :param hidra_file: HIDRA project file instance :return: """ # Check checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) # Get values self._instrument_setup = hidra_file.read_instrument_geometry() def _load_masks(self, hidra_file): """Load masks from Hidra project file Parameters ---------- hidra_file : pyrs.projectfile.file_object.HidraProjectFile Hidra project file instance Returns ------- """ # Check checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) # Default mask: get value and set default_mask = hidra_file.read_default_masks() if default_mask is not None: self.set_detector_mask(default_mask, True) # User specified mask mask_dict = dict() hidra_file.read_user_masks(mask_dict) for mask_name in mask_dict: self.set_detector_mask(mask_dict[mask_name], False, mask_name) def _load_sample_logs(self, hidra_file): """ Load sample logs. Note: this method can clear all the sample logs added previously. But it is not an issue in the real use cases. :param hidra_file: HIDRA project file instance :return: """ checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) # overwrite the existing sample logs self._sample_logs = hidra_file.read_sample_logs() def _load_wave_length(self, hidra_file): """Load wave length from HidraProject file Parameters ---------- hidra_file : pyrs.projectfile.file_object.HidraProjectFile Project file (instance) Returns ------- None """ checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) # reset the wave length (dictionary) from HIDRA project file self._wave_length = hidra_file.read_wavelengths() def get_detector_2theta(self, sub_run): """ Get 2theta value from sample log This is a special one :param sub_run: sub run number (integer) :return: float number as 2theta """ checkdatatypes.check_int_variable('Sub run number', sub_run, (0, None)) try: two_theta = self._sample_logs[HidraConstants.TWO_THETA, sub_run] except KeyError as key_err: raise RuntimeError( 'Unable to retrieve 2theta value ({}) from sub run {} due to missing key {}.' 'Available sample logs are {}'.format( HidraConstants.TWO_THETA, sub_run, key_err, self._sample_logs.keys())) return two_theta[0] # convert from numpy array of length 1 to a scalar def get_l2(self, sub_run): """ Get L2 for a specific sub run :param sub_run: sub run number (integer) :return: L2 or None (i.e., using default L2) """ checkdatatypes.check_int_variable('Sub run number', sub_run, (0, None)) if HidraConstants.L2 in self._sample_logs: # L2 is a valid sample log: get L2 try: # convert from numpy array of length 1 to a scalar l2 = self._sample_logs[HidraConstants.L2, sub_run][0] except KeyError as key_err: raise RuntimeError( 'Unable to retrieve L2 value for {} due to {}. Available sun runs are {}' .format(sub_run, key_err, self._sample_logs[HidraConstants.L2])) else: # L2 might be unchanged l2 = None return l2 def get_instrument_setup(self): """ Get the handler to instrument setup :return: """ return self._instrument_setup def get_detector_counts(self, sub_run): """Get the detector counts of a sub run (split) Parameters ---------- sub_run : int sub run number Returns ------- numpy.ndarray """ checkdatatypes.check_int_variable( 'Sub run number', sub_run, (0, None)) # consider 0 as a single sub run if int(sub_run) not in self._raw_counts: raise RuntimeError( 'Sub run {} does not exist in loaded raw counts. FYI loaded ' 'sub runs are {}'.format(sub_run, self._raw_counts.keys())) return self._raw_counts[sub_run] def get_sub_runs(self): """Get sub runs that loaded to this workspace Returns ------- numpy.ndarray 1D array for sorted sub runs """ if len(self._sample_logs.subruns) == 0: raise RuntimeError('Sub run - spectrum map has not been built') return self._sample_logs.subruns def get_wavelength(self, calibrated, throw_if_not_set, sub_run=None): """Get wave length Parameters ---------- calibrated : bool whether the wave length is calibrated or raw throw_if_not_set : bool throw an exception if wave length is not set to workspace sub_run : None or int sub run number for the wave length associated with Returns ------- float or dict """ # Return the universal wave length if it is set if sub_run is None and self._wave_length is not None: return self._wave_length if calibrated: # calibrated wave length if self._wave_length_calibrated_dict is None: if throw_if_not_set: raise RuntimeError( 'There is no calibrated wave length in HidraWorkspace {}' .format(self._name)) else: wave_length_dict = None else: wave_length_dict = self._wave_length_calibrated_dict else: # native wave length if self._wave_length_dict is None: if throw_if_not_set: raise RuntimeError( 'There is no original/native wave length in HidraWorkspace {}' ''.format(self._name)) else: wave_length_dict = None else: wave_length_dict = self._wave_length_dict # Return the wave length of the sub run if sub_run is not None: return wave_length_dict[sub_run] return wave_length_dict def load_hidra_project(self, hidra_file, load_raw_counts, load_reduced_diffraction): """ Load HIDRA project file :param hidra_file: HIDRA project file instance (not file name) :param load_raw_counts: Flag to load raw counts :param load_reduced_diffraction: Flag to load reduced diffraction data :return: """ # Check input checkdatatypes.check_type('HIDRA project file', hidra_file, HidraProjectFile) self._project_file_name = hidra_file.name self._project_file = hidra_file # create the spectrum map - must exist before loading the counts array self._sample_logs.subruns = hidra_file.read_sub_runs() # load raw detector counts and load instrument if load_raw_counts: self._load_raw_counts(hidra_file) self._load_instrument(hidra_file) # load reduced diffraction if load_reduced_diffraction: self._load_reduced_diffraction_data(hidra_file) # load sample logs self._load_sample_logs(hidra_file) # load masks self._load_masks(hidra_file) # load the wave length self._load_wave_length(hidra_file) def get_detector_mask(self, is_default, mask_id=None): """Get detector mask Parameters ---------- is_default : bool If True, get the default detector mask mask_id : str with is_default is False, get the user-specified mask/ROI Returns ------- numpy.ndarray or None detector mask. None in case no default detector mask """ # Default mask if is_default: return self._default_mask # User-specific mask if mask_id not in self._mask_dict: raise RuntimeError( 'Mask ID {} does not exist in HidraWorkspace {}. Available masks are ' '{}'.format(mask_id, self._name, self._mask_dict.keys())) return self._mask_dict[mask_id] def get_detector_shift(self): """ Get detector geometry shift :return: AnglerDetectorShift instance """ return self._instrument_geometry_shift def get_reduced_diffraction_data_set(self, mask_id=None): """Get reduced diffraction data set including 2theta and intensities Get the full data set (matrix) of reduced diffraction pattern in 2theta unit Parameters ---------- mask_id : str or None None (as default main) or ID as a String Returns ------- ndarray, ndarray 2theta in 2D array intensities in 2D array """ # Check if mask_id is None: # mask_id is 'main' pass else: checkdatatypes.check_string_variable('Mask ID', mask_id) # Vector 2theta matrix_2theta = self._2theta_matrix.copy() try: intensity_matrix = self._diff_data_set[mask_id].copy() except KeyError: raise RuntimeError( 'Mask ID {} does not exist in reduced diffraction pattern. ' 'The available masks are {}' ''.format(mask_id, self._diff_data_set.keys())) try: variance_matrix = self._var_data_set[mask_id].copy() except KeyError: raise RuntimeError( 'Mask ID {} does not exist in reduced diffraction pattern. ' 'The available masks are {}' ''.format(mask_id, self._var_data_set.keys())) return matrix_2theta, intensity_matrix, variance_matrix def get_reduced_diffraction_data_2theta(self, sub_run): """Get 2theta vector of reduced diffraction data Parameters ---------- sub_run : int sub run number Returns ------- numpy.ndarray vector of 2theta """ # Check inputs checkdatatypes.check_int_variable('Sub run number', sub_run, (1, None)) # Get spectrum index spec_index = self._sample_logs.get_subrun_indices(sub_run)[0] # Vector 2theta vec_2theta = self._2theta_matrix[spec_index][:] return vec_2theta def get_reduced_diffraction_data(self, sub_run, mask_id=None): """Get data set of a single diffraction pattern Parameters ---------- sub_run: int sub run number (integer) mask_id : str or None None (as default main) or ID as a String Returns ------- numpy.ndarray, numpy.ndarray vector 2theta, vector intensity """ # Check inputs # sub run number might start from 0 checkdatatypes.check_int_variable('Sub run number', sub_run, (0, None)) if mask_id is None: # mask_id = 'main' pass else: checkdatatypes.check_string_variable('Mask ID', mask_id) spec_index = self._sample_logs.get_subrun_indices(sub_run)[0] # Vector 2theta vec_2theta = self._2theta_matrix[spec_index][:] # Vector intensity try: vec_intensity = self._diff_data_set[mask_id][spec_index].copy() except KeyError: raise RuntimeError( 'Mask ID {} does not exist in reduced diffraction pattern. ' 'The available masks are {}' ''.format(mask_id, self._diff_data_set.keys())) return vec_2theta, vec_intensity def get_mask_ids(self): """ Returns ------- array list of mask ids """ return list(self._diff_data_set.keys()) def get_sample_log_names(self): return sorted(self._sample_logs.keys()) def get_sample_log_value(self, sample_log_name, sub_run=None): """ Parameters ---------- sample_log_name sub_run Returns ------- float time-averaged sample log value for this sub run """ checkdatatypes.check_string_variable('Sample log name', sample_log_name, list(self._sample_logs.keys())) log_value = self._sample_logs[sample_log_name, sub_run] if isinstance(log_value, numpy.ndarray): assert log_value.shape == (1, ), 'Single log {} (= {}) is a numpy array with multiple items' \ '(shape = {})'.format(sample_log_name, log_value, log_value.shape) log_value = log_value[0] return log_value def get_sample_log_values(self, sample_log_name, sub_runs=None): """Get ONE INDIVIDUAL sample log's values as a vector Exceptions ---------- RuntimeError : if sample log name not in sample_log_dict Parameters ---------- sample_log_name : str sample_log_name sub_runs : list or ndarray or None None for all log values, List/ndarray for selected sub runs Returns ------- ndarray sample log values ordered by sub run numbers with given sub runs or all sub runs """ if sample_log_name == HidraConstants.SUB_RUNS and \ sample_log_name not in self._sample_logs.keys(): return self.get_sub_runs() checkdatatypes.check_string_variable('Sample log name', sample_log_name, list(self._sample_logs.keys())) return self._sample_logs[sample_log_name, sub_runs] def get_spectrum_index(self, sub_run): """ Get spectrum (index) from sub run number :param sub_run: sub run number (integer) :return: """ checkdatatypes.check_int_variable('Sub run number', sub_run, (0, None)) return self._sample_logs.get_subrun_indices(sub_run)[0] def get_sub_runs_from_spectrum(self, spectra): """ Get sub runs corresponding to spectra (same as ws index) :param spectra: list/vector/array of spectra (workspace indexes) :return: """ if not (isinstance(spectra, list) or isinstance(spectra, numpy.ndarray)): raise AssertionError('{} must be list or array'.format( type(spectra))) return self._sample_logs.subruns[spectra] def has_raw_data(self, sub_run): """ Check whether a raw file that has been loaded :param sub_run: sub run number (integer) :return: """ checkdatatypes.check_int_variable('Sub run', sub_run, (1, None)) return sub_run in self._raw_counts def has_sample_log(self, sample_log_name): """ check whether a certain sample log exists in the workspace (very likely loaded from file) :param sample_log_name: sample log name :return: """ # Check inputs checkdatatypes.check_string_variable('Sample log name', sample_log_name) has_log = sample_log_name in self._sample_logs return has_log def set_instrument_geometry(self, instrument): self._instrument_setup = instrument def set_detector_mask(self, mask_array, is_default, mask_id=None): """Set mask array to HidraWorkspace Record the mask to HidraWorkspace future reference Parameters ---------- mask_array : numpy.darray mask bit for each pixel is_default : bool whether this mask is the default mask from beginning mask_id : str ID for mask Returns ------- """ checkdatatypes.check_numpy_arrays('Detector mask', [mask_array], None, False) # Convert mask to 1D array if len(mask_array.shape) == 2: # rule out unexpected shape if mask_array.shape[1] != 1: raise RuntimeError( 'Mask array with shape {} is not acceptable'.format( mask_array.shape)) # convert from (N, 1) to (N,) num_pixels = mask_array.shape[0] mask_array = mask_array.reshape((num_pixels, )) # END-IF if is_default: self._default_mask = mask_array else: checkdatatypes.check_string_variable('Mask ID', mask_id, allow_empty=False) self._mask_dict[mask_id] = mask_array def set_raw_counts(self, sub_run_number, counts): """ Set the raw counts to :param sub_run_number: integer for sub run number :param counts: ndarray of detector counts :return: """ checkdatatypes.check_numpy_arrays('Counts', [counts], dimension=None, check_same_shape=False) if len(counts.shape) == 2 and counts.shape[1] == 1: # 1D array in 2D format: set to 1D array counts = counts.reshape((counts.shape[0], )) self._raw_counts[int(sub_run_number)] = counts def set_reduced_diffraction_data(self, sub_run, mask_id, two_theta_array, intensity_array, variances_array=None): """Set reduced diffraction data to workspace Parameters ---------- sub_run : int sub run number mask_id : None or str mask ID. None for no-mask or masked by default/universal detector masks on edges two_theta_array : numpy.ndarray 2theta bins (center) intensity_array : numpy.ndarray histogrammed intensities variances_array : numpy.ndarray histogrammed variances Returns ------- None """ # Check status of reducer whether sub run number and spectrum are initialized if len(self._sample_logs.subruns) == 0: raise RuntimeError( 'Sub run - spectrum map has not been set up yet!') # Check inputs # sub run number valid or not checkdatatypes.check_int_variable('Sub run number', sub_run, (1, None)) if mask_id is not None: checkdatatypes.check_string_variable('Mask ID', mask_id) # two theta array and intensity array shall match on size if two_theta_array.shape != intensity_array.shape: raise RuntimeError( 'Two theta array (bin centers) must have same dimension as intensity array. ' 'Now they are {} and {}'.format(two_theta_array.shape, intensity_array.shape)) # Set 2-theta 2D array if self._2theta_matrix is None or len(self._2theta_matrix.shape) != 2: # First time set up or legacy from input file: create the 2D array num_sub_runs = len(self._sample_logs.subruns) self._2theta_matrix = numpy.ndarray( shape=(num_sub_runs, two_theta_array.shape[0]), dtype=intensity_array.dtype) # set the diffraction data (2D) array with new dimension num_sub_runs = len(self._sample_logs.subruns) self._diff_data_set[mask_id] = numpy.ndarray( shape=(num_sub_runs, intensity_array.shape[0]), dtype=intensity_array.dtype) if variances_array is None: variances_array = numpy.sqrt(intensity_array) # END-IF # set the diffraction data (2D) array with new dimension num_sub_runs = len(self._sample_logs.subruns) self._var_data_set[mask_id] = numpy.ndarray( shape=(num_sub_runs, variances_array.shape[0]), dtype=variances_array.dtype) elif mask_id not in self._diff_data_set: # A new mask: reset the diff_data_set again num_sub_runs = len(self._sample_logs.subruns) self._diff_data_set[mask_id] = numpy.ndarray( shape=(num_sub_runs, intensity_array.shape[0]), dtype=intensity_array.dtype) # set the diffraction data (2D) array with new dimension num_sub_runs = len(self._sample_logs.subruns) self._var_data_set[mask_id] = numpy.ndarray( shape=(num_sub_runs, variances_array.shape[0]), dtype=variances_array.dtype) # END-IF # Get spectrum index from sub run number spec_id = self._sample_logs.get_subrun_indices(sub_run)[0] # Another sanity check on the size of 2theta and intensity if self._2theta_matrix.shape[1] != two_theta_array.shape[0] \ or self._diff_data_set[mask_id].shape[1] != intensity_array.shape[0]: # Need to check if previously set raise RuntimeError( '2theta vector are different between parent method set {} and ' 'reduction engine returned {} OR ' 'Histogram (shape: {}) to set does not match data diffraction data set defined in ' 'worksapce (shape: {})'.format( self._2theta_matrix.shape, two_theta_array.shape, intensity_array.shape[0], self._diff_data_set[mask_id].shape[1])) # END-IF-ELSE # Set 2theta array self._2theta_matrix[spec_id] = two_theta_array # Set intensity self._diff_data_set[mask_id][spec_id] = intensity_array # Set variances self._var_data_set[mask_id][spec_id] = variances_array def set_sample_log(self, log_name, sub_runs, log_value_array): """Set sample log value for each sub run, i.e., average value in each sub run Parameters ---------- log_name : str sample log name sub_runs: ndarray sub runs with same shape as log_value_array log_value_array : ndarray log values Returns ------- None """ # Check inputs checkdatatypes.check_string_variable('Log name', log_name) checkdatatypes.check_numpy_arrays('Sub runs and log values', [sub_runs, log_value_array], 1, True) if len(self._sample_logs) > 0: self._sample_logs.matching_subruns(sub_runs) else: self._sample_logs.subruns = numpy.atleast_1d(sub_runs) # Set sub runs and log value to dictionary self._sample_logs[log_name] = numpy.atleast_1d(log_value_array) def set_sub_runs(self, sub_runs): """Set sub runs to this workspace Including create the sub run and spectrum map Parameters ---------- sub_runs: list list of integers as sub runs Returns ------- """ self._sample_logs.subruns = sorted(sub_runs) def save_experimental_data(self, hidra_project, sub_runs=None, ignore_raw_counts=False): """Save experimental data including raw counts and sample logs to HiDRA project file Export (aka save) raw detector counts and sample logs from this HidraWorkspace to a HiDRA project file Exporting sub run's counts is an option Parameters ---------- hidra_project: HidraProjectFile reference to a HyDra project file sub_runs: None or list/ndarray(1D) None for exporting all or the specified sub runs ignore_raw_counts : bool flag to not to export raw counts to file Returns ------- None """ # Add raw counts if it is specified to save if not ignore_raw_counts: for sub_run_i in self._raw_counts.keys(): if sub_runs is None or sub_run_i in sub_runs: hidra_project.append_raw_counts( sub_run_i, self._raw_counts[sub_run_i]) else: print('[WARNING] sub run {} is not exported to {}' ''.format(sub_run_i, hidra_project.name)) # END-IF-ELSE # END-FOR # Add entry for sub runs (first) if sub_runs is None: # all sub runs sub_runs_array = numpy.array(sorted(self._raw_counts.keys())) elif isinstance(sub_runs, list): # convert to ndarray sub_runs_array = numpy.array(sub_runs) else: # same thing sub_runs_array = sub_runs hidra_project.append_experiment_log(HidraConstants.SUB_RUNS, sub_runs_array) # Add regular sample logs for log_name in self._sample_logs.keys(): # no operation on 'sub run': skip if log_name == HidraConstants.SUB_RUNS: continue # Convert each sample log to a numpy array sample_log_value = self.get_sample_log_values( sample_log_name=log_name, sub_runs=sub_runs) # Add log value to project file hidra_project.append_experiment_log(log_name, sample_log_value) # END-FOR # Save default mask if self._default_mask is not None: hidra_project.write_mask_detector_array( HidraConstants.DEFAULT_MASK, self._default_mask) # Save other masks for mask_id in self._mask_dict: hidra_project.write_mask_detector_array(mask_id, self._mask_dict[mask_id]) # Save wave length self.save_wavelength(hidra_project) def save_wavelength(self, hidra_project): if self._wave_length is not None: hidra_project.write_wavelength(self._wave_length) def save_reduced_diffraction_data(self, hidra_project): """ Export reduced diffraction data to project :param hidra_project: HidraProjectFile instance :return: """ checkdatatypes.check_type('HIDRA project file', hidra_project, HidraProjectFile) hidra_project.write_reduced_diffraction_data_set( self._2theta_matrix, self._diff_data_set, self._var_data_set) @property def sample_log_names(self): """ return the sample log names :return: """ return sorted(self._sample_logs.keys()) @property def sample_logs_for_plot(self): """ Get names of sample logs that can be plotted, i.e., the log values are integer or float """ return sorted(self._sample_logs.plottable_logs) def set_wavelength(self, wave_length, calibrated): """ Set wave length which could be either a float (uniform) or a dictionary :param wave_length: :param calibrated: Flag for calibrated wave length :return: """ # Set universal wave length self._wave_length = wave_length # Get the sub runs sub_runs = self.get_sub_runs() if isinstance(wave_length, float): # single wave length value wl_dict = dict() for sub_run in sub_runs: wl_dict[sub_run] = wave_length elif isinstance(wave_length, dict): # already in the dictionary format: check the sub runs dict_keys = sorted(wave_length.keys()) if dict_keys != sub_runs: raise RuntimeError( 'Input wave length dictionary has different set of sub runs' ) wl_dict = wave_length else: # unsupported format raise RuntimeError('Wave length {} in format {} is not supported.' ''.format(wave_length, type(wave_length))) # Set to desired target if calibrated: self._wave_length_calibrated_dict = wl_dict else: self._wave_length_dict = wl_dict def reset_diffraction_data(self): """Reset the data structures to store the diffraction data set Returns ------- None """ self._2theta_matrix = None
def test_write_csv(): csv_filename = 'test_write_single.csv' if os.path.exists(csv_filename): os.remove(csv_filename) # create a PeakCollection gaussian = PeakShape.GAUSSIAN linear = BackgroundFunction.LINEAR subruns = [1, 2, 3] data_type = [ (name, numpy.float32) for name in gaussian.native_parameters + linear.native_parameters ] data = numpy.zeros(len(subruns), dtype=data_type) # doesn't matter what the values are error = numpy.zeros(len(subruns), dtype=data_type) # doesn't matter what the values are peaks = PeakCollection('fake', gaussian, linear) peaks.set_peak_fitting_values(subruns, data, error, [10., 20., 30.]) # create a SampleLog sample = SampleLogs() sample.subruns = subruns sample['variable1'] = numpy.linspace(0., 100., len(subruns)) sample['variable2'] = numpy.linspace( 100., 200., len(subruns)) # not to be found in the output sample['constant1'] = numpy.linspace( 1., 1. + 5E-11, len(subruns)) # values less than tolerance sample['string1'] = numpy.array( ['a constant string'] * sample.subruns.size) # will be constant as well # write things out to disk generator = SummaryGenerator( csv_filename, log_list=['variable1', 'constant1', 'missing1', 'string1']) generator.setHeaderInformation(dict()) # means empty header generator.write_csv(sample, [peaks]) assert os.path.exists(csv_filename), '{} was not created'.format( csv_filename) EXPECTED_HEADER = '''# IPTS number # Run # Scan title # Sample name # Item number # HKL phase # Strain direction # Monochromator setting # Calibration file # Hidra project file # Manual vs auto reduction # missing: missing1 # constant1 = 1 +/- 2e-11 # string1 = a constant string'''.split('\n') # verify the file contents with open(csv_filename, 'r') as handle: # read in the file and remove whitespace contents = [line.strip() for line in handle.readlines()] # verify exact match on the header for exp, obs in zip(contents[:len(EXPECTED_HEADER)], EXPECTED_HEADER): assert exp == obs # verify the column headers assert contents[len(EXPECTED_HEADER)].startswith( 'sub-run,variable1,fake_dspacing_center,') assert contents[len(EXPECTED_HEADER)].endswith(',fake_chisq') assert len(contents) == len(EXPECTED_HEADER) + 1 + len( subruns), 'Does not have full body' # verify that the number of columns is correct # columns are (subruns, one log, parameter values, uncertainties, chisq) for line in contents[len(EXPECTED_HEADER) + 1:]: # skip past header and constant log assert len(line.split(',')) == 1 + 1 + 9 * 2 + 1 # cleanup os.remove(csv_filename)