def overwrite(fname): """ If file exists 'fname', ask for overwriting and return True or False, else return True. """ if os.path.isfile(fname): message = "Overwrite '%s' (y/n)?\n" % fname try: answer = raw_input(message) answer = answer.lower() while (answer != 'y') and (answer != 'n'): print('Please answer y or n.') answer = raw_input(message) if answer.lower() == 'y': return True elif answer.lower() == 'n': # print('Operation canceled.') return False except: # We are running in the IPython notebook that does not # support raw_input information("Your terminal does not support raw input. " "Not overwriting. " "To overwrite the file use `overwrite=True`") return False else: return True
def __init__(self, element_subshell, GOS=None): # Declare the parameters Component.__init__(self, ["intensity", "fine_structure_coeff", "effective_angle", "onset_energy"]) self.name = element_subshell self.element, self.subshell = element_subshell.split("_") self.energy_scale = None self.effective_angle.free = False self.fine_structure_active = preferences.EELS.fine_structure_active self.fine_structure_width = preferences.EELS.fine_structure_width self.fine_structure_coeff.ext_force_positive = False self.GOS = None # Set initial actions if GOS is None: try: self.GOS = HartreeSlaterGOS(element_subshell) GOS = "Hartree-Slater" except IOError: GOS = "hydrogenic" messages.information("Hartree-Slater GOS not available" "Using hydrogenic GOS") if self.GOS is None: if GOS == "Hartree-Slater": self.GOS = HartreeSlaterGOS(element_subshell) elif GOS == "hydrogenic": self.GOS = HydrogenicGOS(element_subshell) else: raise ValueError("gos must be one of: None, 'hydrogenic'" " or 'Hartree-Slater'") self.onset_energy.value = self.GOS.onset_energy self.onset_energy.free = False self._position = self.onset_energy self.free_onset_energy = False self.intensity.grad = self.grad_intensity self.intensity.value = 1 self.intensity.bmin = 0.0 self.intensity.bmax = None
def load_with_reader(filename, reader, record_by=None, signal=None, **kwds): from hyperspy.signals.image import Image from hyperspy.signals.spectrum import Spectrum from hyperspy.signals.eels import EELSSpectrum messages.information(reader.description) file_data_list = reader.file_reader(filename, record_by=record_by, **kwds) objects = [] for file_data_dict in file_data_list: if record_by is not None: file_data_dict['mapped_parameters']['record_by'] = record_by # The record_by can still be None if it was not defined by the reader if file_data_dict['mapped_parameters']['record_by'] is None: print "No data type provided. Defaulting to image." file_data_dict['mapped_parameters']['record_by'] = 'image' if signal is not None: file_data_dict['mapped_parameters']['signal'] = signal if file_data_dict['mapped_parameters']['record_by'] == 'image': s = Image(file_data_dict) else: if file_data_dict['mapped_parameters']['signal'] == 'EELS': s = EELSSpectrum(file_data_dict) else: s = Spectrum(file_data_dict) if defaults.plot_on_load is True: s.plot() objects.append(s) if len(objects) == 1: objects = objects[0] return objects
def load_with_reader(filename, reader, record_by = None, signal = None, **kwds): from hyperspy.signals.image import Image from hyperspy.signals.spectrum import Spectrum from hyperspy.signals.eels import EELSSpectrum messages.information(reader.description) file_data_list = reader.file_reader(filename, record_by=record_by, **kwds) objects = [] for file_data_dict in file_data_list: if record_by is not None: file_data_dict['mapped_parameters']['record_by'] = record_by # The record_by can still be None if it was not defined by the reader if file_data_dict['mapped_parameters']['record_by'] is None: print "No data type provided. Defaulting to image." file_data_dict['mapped_parameters']['record_by']= 'image' if signal is not None: file_data_dict['mapped_parameters']['signal'] = signal if file_data_dict['mapped_parameters']['record_by'] == 'image': s = Image(file_data_dict) else: if file_data_dict['mapped_parameters']['signal'] == 'EELS': s = EELSSpectrum(file_data_dict) else: s = Spectrum(file_data_dict) if defaults.plot_on_load is True: s.plot() objects.append(s) if len(objects) == 1: objects = objects[0] return objects
def plot_explained_variance_ratio(self, n=50, log = True, on_peaks=False, ax = None, label = None): """Plot the decomposition explained variance ratio vs index number Parameters ---------- n : int Number of components log : bool If True, the y axis uses a log scale ax : matplotlib.axes instance The axes where to plot the figures. If None, a new figure will be created label: str An optional label for the legend """ target = self._get_target(on_peaks) if target.explained_variance_ratio is None: messages.information( 'No explained variance ratio information available') return 0 if n > target.explained_variance_ratio.shape[0]: n = target.explained_variance_ratio.shape[0] if ax is None: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(range(n), target.explained_variance_ratio[:n], 'o', label=label) if log is True: ax.semilogy() ax.set_ylabel('Explained variance ratio') ax.set_xlabel('Principal component index') plt.legend() plt.show() return ax
def __init__(self, element_subshell, GOS=None): # Declare the parameters Component.__init__(self, [ 'intensity', 'fine_structure_coeff', 'effective_angle', 'onset_energy' ]) if isinstance(element_subshell, dict): self.element = element_subshell['element'] self.subshell = element_subshell['subshell'] else: self.element, self.subshell = element_subshell.split('_') self.name = "_".join([self.element, self.subshell]) self.energy_scale = None self.effective_angle.free = False self.fine_structure_active = preferences.EELS.fine_structure_active self.fine_structure_width = preferences.EELS.fine_structure_width self.fine_structure_coeff.ext_force_positive = False self.GOS = None # Set initial actions if GOS is None: try: self.GOS = HartreeSlaterGOS(element_subshell) GOS = 'Hartree-Slater' except IOError: GOS = 'hydrogenic' messages.information('Hartree-Slater GOS not available. ' 'Using hydrogenic GOS') if self.GOS is None: if GOS == 'Hartree-Slater': self.GOS = HartreeSlaterGOS(element_subshell) elif GOS == 'hydrogenic': self.GOS = HydrogenicGOS(element_subshell) else: raise ValueError('gos must be one of: None, \'hydrogenic\'' ' or \'Hartree-Slater\'') self.onset_energy.value = self.GOS.onset_energy self.onset_energy.free = False self._position = self.onset_energy self.free_onset_energy = False self.intensity.grad = self.grad_intensity self.intensity.value = 1 self.intensity.bmin = 0. self.intensity.bmax = None self._whitelist['GOS'] = ('init', GOS) if GOS == 'Hartree-Slater': self._whitelist['element_subshell'] = ( 'init', self.GOS.as_dictionary(True)) elif GOS == 'hydrogenic': self._whitelist['element_subshell'] = ('init', element_subshell) self._whitelist['fine_structure_active'] = None self._whitelist['fine_structure_width'] = None self._whitelist['fine_structure_smoothing'] = None self.effective_angle.events.value_changed.connect( self._integrate_GOS, []) self.onset_energy.events.value_changed.connect(self._integrate_GOS, []) self.onset_energy.events.value_changed.connect(self._calculate_knots, [])
def __init__(self, element_subshell, GOS=None): # Declare the parameters Component.__init__(self, ['intensity', 'fine_structure_coeff', 'effective_angle', 'onset_energy']) if isinstance(element_subshell, dict): self.element = element_subshell['element'] self.subshell = element_subshell['subshell'] else: self.element, self.subshell = element_subshell.split('_') self.name = "_".join([self.element, self.subshell]) self.energy_scale = None self.effective_angle.free = False self.fine_structure_active = preferences.EELS.fine_structure_active self.fine_structure_width = preferences.EELS.fine_structure_width self.fine_structure_coeff.ext_force_positive = False self.GOS = None # Set initial actions if GOS is None: try: self.GOS = HartreeSlaterGOS(element_subshell) GOS = 'Hartree-Slater' except IOError: GOS = 'hydrogenic' messages.information( 'Hartree-Slater GOS not available. ' 'Using hydrogenic GOS') if self.GOS is None: if GOS == 'Hartree-Slater': self.GOS = HartreeSlaterGOS(element_subshell) elif GOS == 'hydrogenic': self.GOS = HydrogenicGOS(element_subshell) else: raise ValueError( 'gos must be one of: None, \'hydrogenic\'' ' or \'Hartree-Slater\'') self.onset_energy.value = self.GOS.onset_energy self.onset_energy.free = False self._position = self.onset_energy self.free_onset_energy = False self.intensity.grad = self.grad_intensity self.intensity.value = 1 self.intensity.bmin = 0. self.intensity.bmax = None self._whitelist['GOS'] = ('init', GOS) if GOS == 'Hartree-Slater': self._whitelist['element_subshell'] = ( 'init', self.GOS.as_dictionary(True)) elif GOS == 'hydrogenic': self._whitelist['element_subshell'] = ('init', element_subshell) self._whitelist['fine_structure_active'] = None self._whitelist['fine_structure_width'] = None self._whitelist['fine_structure_smoothing'] = None
def normalize_poissonian_noise(self, navigation_mask=None, signal_mask=None): """ Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to "normalize" the poissonian data for decomposition analysis Parameters ---------- navigation_mask : boolen numpy array signal_mask : boolen numpy array """ messages.information( "Scaling the data to normalize the (presumably)" " Poissonian noise") refold = self.unfold_if_multidim() # The rest of the code assumes that the first data axis # is the navigation axis. We transpose the data if that is not the # case. dc = (self.data if self.axes_manager[0].index_in_array == 0 else self.data.T) if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask.ravel() if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # Rescale the data to gaussianize the poissonian noise aG = dc[:, signal_mask][navigation_mask, :].sum(1).squeeze() bH = dc[:, signal_mask][navigation_mask, :].sum(0).squeeze() # Checks if any is negative if (aG < 0).any() or (bH < 0).any(): raise ValueError( "Data error: negative values\n" "Are you sure that the data follow a poissonian " "distribution?") self._root_aG = np.sqrt(aG)[:, np.newaxis] self._root_bH = np.sqrt(bH)[np.newaxis, :] # We first disable numpy's warning when the result of an # operation produces nans np.seterr(invalid='ignore') dc[:, signal_mask][navigation_mask, :] /= (self._root_aG * self._root_bH) # Enable numpy warning np.seterr(invalid=None) # Set the nans resulting from 0/0 to zero dc[:, signal_mask][navigation_mask, :] = \ np.nan_to_num(dc[:, signal_mask][navigation_mask, :]) if refold is True: print "Automatically refolding the SI after scaling" self.fold()
def normalize_poissonian_noise(self, navigation_mask=None, signal_mask=None): """ Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to "normalize" the poissonian data for decomposition analysis Parameters ---------- navigation_mask : boolen numpy array signal_mask : boolen numpy array """ messages.information( "Scaling the data to normalize the (presumably)" " Poissonian noise") refold = self.unfold() # The rest of the code assumes that the first data axis # is the navigation axis. We transpose the data if that is not the # case. dc = (self.data if self.axes_manager[0].index_in_array == 0 else self.data.T) if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask.ravel() if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # Rescale the data to gaussianize the poissonian noise aG = dc[:, signal_mask][navigation_mask, :].sum(1).squeeze() bH = dc[:, signal_mask][navigation_mask, :].sum(0).squeeze() # Checks if any is negative if (aG < 0).any() or (bH < 0).any(): raise ValueError( "Data error: negative values\n" "Are you sure that the data follow a poissonian " "distribution?") self._root_aG = np.sqrt(aG)[:, np.newaxis] self._root_bH = np.sqrt(bH)[np.newaxis, :] # We first disable numpy's warning when the result of an # operation produces nans np.seterr(invalid='ignore') dc[:, signal_mask][navigation_mask, :] /= (self._root_aG * self._root_bH) # Enable numpy warning np.seterr(invalid=None) # Set the nans resulting from 0/0 to zero dc[:, signal_mask][navigation_mask, :] = \ np.nan_to_num(dc[:, signal_mask][navigation_mask, :]) if refold is True: print "Automatically refolding the SI after scaling" self.fold()
def unfold_signal_space(self): """Modify the shape of the data to obtain a signal space of dimension 1 """ if self.axes_manager.signal_dimension < 2: messages.information('Nothing done, the signal dimension was ' 'already 1') return False steady_axes = [ axis.index_in_array for axis in self.axes_manager._non_slicing_axes ] unfolded_axis = self.axes_manager._slicing_axes[-1].index_in_array self._unfold(steady_axes, unfolded_axis)
def unfold_signal_space(self): """Modify the shape of the data to obtain a signal space of dimension 1 """ if self.axes_manager.signal_dimension < 2: messages.information('Nothing done, the signal dimension was ' 'already 1') return False steady_axes = [ axis.index_in_array for axis in self.axes_manager._non_slicing_axes] unfolded_axis = self.axes_manager._slicing_axes[-1].index_in_array self._unfold(steady_axes, unfolded_axis)
def __init__(self, element_subshell, GOS=None): # Declare the parameters Component.__init__(self, ["intensity", "fine_structure_coeff", "effective_angle", "onset_energy"]) if isinstance(element_subshell, dict): self.element = element_subshell["element"] self.subshell = element_subshell["subshell"] else: self.element, self.subshell = element_subshell.split("_") self.name = "_".join([self.element, self.subshell]) self.energy_scale = None self.effective_angle.free = False self.fine_structure_active = preferences.EELS.fine_structure_active self.fine_structure_width = preferences.EELS.fine_structure_width self.fine_structure_coeff.ext_force_positive = False self.GOS = None # Set initial actions if GOS is None: try: self.GOS = HartreeSlaterGOS(element_subshell) GOS = "Hartree-Slater" except IOError: GOS = "hydrogenic" messages.information("Hartree-Slater GOS not available. " "Using hydrogenic GOS") if self.GOS is None: if GOS == "Hartree-Slater": self.GOS = HartreeSlaterGOS(element_subshell) elif GOS == "hydrogenic": self.GOS = HydrogenicGOS(element_subshell) else: raise ValueError("gos must be one of: None, 'hydrogenic'" " or 'Hartree-Slater'") self.onset_energy.value = self.GOS.onset_energy self.onset_energy.free = False self._position = self.onset_energy self.free_onset_energy = False self.intensity.grad = self.grad_intensity self.intensity.value = 1 self.intensity.bmin = 0.0 self.intensity.bmax = None self._whitelist["GOS"] = ("init", GOS) if GOS == "Hartree-Slater": self._whitelist["element_subshell"] = ("init", self.GOS.as_dictionary(True)) elif GOS == "hydrogenic": self._whitelist["element_subshell"] = ("init", element_subshell) self._whitelist["fine_structure_active"] = None self._whitelist["fine_structure_width"] = None self._whitelist["fine_structure_smoothing"] = None self.effective_angle.events.value_changed.connect(self._integrate_GOS, []) self.onset_energy.events.value_changed.connect(self._integrate_GOS, []) self.onset_energy.events.value_changed.connect(self._calculate_knots, [])
def load_with_reader(filename, reader, record_by=None, signal_type=None, output_level=1, **kwds): from hyperspy.signals.image import Image from hyperspy.signals.spectrum import Spectrum from hyperspy.signals.eels import EELSSpectrum if output_level>1: messages.information('Loading %s ...' % filename) file_data_list = reader.file_reader(filename, record_by=record_by, output_level=output_level, **kwds) objects = [] for file_data_dict in file_data_list: if record_by is not None: file_data_dict['mapped_parameters']['record_by'] = record_by # The record_by can still be None if it was not defined by the reader if file_data_dict['mapped_parameters']['record_by'] is None: print "No data type provided. Defaulting to image." file_data_dict['mapped_parameters']['record_by']= 'image' if signal_type is not None: file_data_dict['mapped_parameters']['signal_type'] = signal_type if file_data_dict['mapped_parameters']['record_by'] == 'image': s = Image(file_data_dict) else: if ('signal_type' in file_data_dict['mapped_parameters'] and file_data_dict['mapped_parameters']['signal_type'] == 'EELS'): s = EELSSpectrum(file_data_dict) else: s = Spectrum(file_data_dict) folder, filename = os.path.split(os.path.abspath(filename)) filename, extension = os.path.splitext(filename) s.tmp_parameters.folder = folder s.tmp_parameters.filename = filename s.tmp_parameters.extension = extension.replace('.','') objects.append(s) s.print_summary() if len(objects) == 1: objects = objects[0] if output_level>1: messages.information('%s correctly loaded' % filename) return objects
def multifit(self, mask = None, fitter = None, charge_only_fixed = False, grad = False, autosave = False, autosave_every = 10, bounded = False, **kwargs): if fitter is None: fitter = preferences.Model.default_fitter print('Fitter: %s' % fitter) if autosave is not False: fd, autosave_fn = tempfile.mkstemp(prefix = 'hyperspy_autosave-', dir = '.', suffix = '.npz') os.close(fd) autosave_fn = autosave_fn[:-4] messages.information( "Autosaving each %s pixels to %s.npz" % (autosave_every, autosave_fn)) messages.information( "When multifit finishes its job the file will be deleted") if mask is not None and \ (mask.shape != tuple(self.axes_manager.navigation_shape)): messages.warning_exit( "The mask must be an array with the same espatial dimensions as the" "navigation shape, %s" % self.axes_manager.navigation_shape) masked_elements = 0 if mask is None else mask.sum() pbar = progressbar.progressbar( maxval = (np.cumprod(self.axes_manager.navigation_shape)[-1] - masked_elements)) if bounded is True: if fitter == 'mpfit': self.set_mpfit_parameters_info() bounded = None elif fitter in ("tnc", "l_bfgs_b"): self.set_boundaries() bounded = None else: messages.information( "The chosen fitter does not suppport bounding." "If you require boundinig please select one of the following" "fitters instead: mpfit, tnc, l_bfgs_b") bounded = False i = 0 for index in np.ndindex(tuple(self.axes_manager.navigation_shape)): if mask is None or not mask[index]: self.axes_manager.set_not_slicing_indexes(index) self.charge(only_fixed = charge_only_fixed) self.fit(fitter = fitter, grad = grad, bounded = bounded, **kwargs) i += 1 pbar.update(i) if autosave is True and i % autosave_every == 0: self.save_parameters2file(autosave_fn) pbar.finish() if autosave is True: messages.information( 'Deleting the temporary file %s pixels' % (autosave_fn + 'npz')) os.remove(autosave_fn + '.npz')
def __init__(self, element_subshell, GOS=None): # Declare the parameters Component.__init__(self, ['intensity', 'fine_structure_coeff', 'effective_angle', 'onset_energy']) self.name = element_subshell self.element, self.subshell = element_subshell.split('_') self.energy_scale = None self.effective_angle.free = False self.fine_structure_active = preferences.EELS.fine_structure_active self.fine_structure_width = preferences.EELS.fine_structure_width self.fine_structure_coeff.ext_force_positive = False self.GOS = None # Set initial actions if GOS is None: try: self.GOS = HartreeSlaterGOS(element_subshell) GOS = 'Hartree-Slater' except IOError: GOS = 'hydrogenic' messages.information( 'Hartree-Slater GOS not available' 'Using hydrogenic GOS') if self.GOS is None: if GOS=='Hartree-Slater': self.GOS = HartreeSlaterGOS(element_subshell) elif GOS == 'hydrogenic': self.GOS = HydrogenicGOS(element_subshell) else: raise ValueError( 'gos must be one of: None, \'hydrogenic\'' ' or \'Hartree-Slater\'') self.onset_energy.value = self.GOS.onset_energy self.onset_energy.free = False self._position = self.onset_energy self.free_onset_energy = False self.intensity.grad = self.grad_intensity self.intensity.value = 1 self.intensity.bmin = 0. self.intensity.bmax = None
def plot_explained_variance_ratio(self, n=50, log = True, ax = None, label = None): """Plot the decomposition explained variance ratio vs index number Parameters ---------- n : int Number of components log : bool If True, the y axis uses a log scale ax : matplotlib.axes instance The axes where to plot the figures. If None, a new figure will be created label: str An optional label for the legend Returns ------- The axe of the plot, that can be passed to the method again in a future call using the ax attribute """ target = self.learning_results if target.explained_variance_ratio is None: messages.information( 'No explained variance ratio information available') return 0 if n > target.explained_variance_ratio.shape[0]: n = target.explained_variance_ratio.shape[0] if ax is None: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(range(n), target.explained_variance_ratio[:n], 'o', label=label) if log is True: ax.semilogy() ax.set_ylabel('Explained variance ratio') ax.set_xlabel('Principal component index') plt.legend() plt.show() return ax
def load_with_reader(filename, reader, record_by = None, signal_type = None, output_level=1, **kwds): from hyperspy.signals.image import Image from hyperspy.signals.spectrum import Spectrum from hyperspy.signals.eels import EELSSpectrum if output_level>1: messages.information('Loading %s ...' % filename) file_data_list = reader.file_reader(filename, record_by=record_by, output_level=output_level, **kwds) objects = [] for file_data_dict in file_data_list: if record_by is not None: file_data_dict['mapped_parameters']['record_by'] = record_by # The record_by can still be None if it was not defined by the reader if file_data_dict['mapped_parameters']['record_by'] is None: print "No data type provided. Defaulting to image." file_data_dict['mapped_parameters']['record_by']= 'image' if signal_type is not None: file_data_dict['mapped_parameters']['signal_type'] = signal_type if file_data_dict['mapped_parameters']['record_by'] == 'image': s = Image(file_data_dict) else: if 'signal_type' in file_data_dict['mapped_parameters'] and \ file_data_dict['mapped_parameters']['signal_type'] == 'EELS': s = EELSSpectrum(file_data_dict) else: s = Spectrum(file_data_dict) objects.append(s) print s if len(objects) == 1: objects = objects[0] if output_level>1: messages.information('%s correctly loaded' % filename) return objects
def load_with_reader(filename, reader, record_by = None, signal_type = None, output_level=1, is_agg = False, **kwds): from hyperspy.signals.image import Image from hyperspy.signals.spectrum import Spectrum from hyperspy.signals.eels import EELSSpectrum if output_level>1: messages.information(reader.description) file_data_list = reader.file_reader(filename, record_by=record_by, output_level=output_level, **kwds) objects = [] for file_data_dict in file_data_list: if record_by is not None: file_data_dict['mapped_parameters']['record_by'] = record_by # The record_by can still be None if it was not defined by the reader if file_data_dict['mapped_parameters']['record_by'] is None: print "No data type provided. Defaulting to image." file_data_dict['mapped_parameters']['record_by']= 'image' if signal_type is not None: file_data_dict['mapped_parameters']['signal_type'] = signal_type if file_data_dict['mapped_parameters']['record_by'] == 'image': s = Image(file_data_dict) else: if 'signal_type' in file_data_dict['mapped_parameters'] and \ file_data_dict['mapped_parameters']['signal_type'] == 'EELS': s = EELSSpectrum(file_data_dict) else: s = Spectrum(file_data_dict) objects.append(s) print s if hyperspy.defaults_parser.preferences.General.plot_on_load is True \ and is_agg is False: s.plot() if len(objects) == 1: objects = objects[0] return objects
def plot_explained_variance_ratio(self, n=50, log=True, ax=None, label=None): """Plot the decomposition explained variance ratio vs index number Parameters ---------- n : int Number of components log : bool If True, the y axis uses a log scale ax : matplotlib.axes instance The axes where to plot the figures. If None, a new figure will be created label: str An optional label for the legend Returns ------- The axe of the plot, that can be passed to the method again in a future call using the ax attribute """ target = self.learning_results if target.explained_variance_ratio is None: messages.information("No explained variance ratio information available") return 0 if n > target.explained_variance_ratio.shape[0]: n = target.explained_variance_ratio.shape[0] if ax is None: fig = plt.figure() ax = fig.add_subplot(111) ax.plot(range(n), target.explained_variance_ratio[:n], "o", label=label) if log is True: ax.semilogy() ax.set_ylabel("Explained variance ratio") ax.set_xlabel("Principal component index") plt.legend() plt.show() return ax
def multifit(self, mask=None, fitter="leastsq", charge_only_fixed=False, grad=False, autosave=False, autosave_every=10, **kwargs): if autosave is not False: fd, autosave_fn = tempfile.mkstemp(prefix='hyperspy_autosave-', dir='.', suffix='.npz') os.close(fd) autosave_fn = autosave_fn[:-4] messages.information("Autosaving each %s pixels to %s.npz" % (autosave_every, autosave_fn)) messages.information( "When multifit finishes its job the file will be deleted") if mask is not None and \ (mask.shape != tuple(self.axes_manager.navigation_shape)): messages.warning_exit( "The mask must be an array with the same espatial dimensions as the" "navigation shape, %s" % self.axes_manager.navigation_shape) masked_elements = 0 if mask is None else mask.sum() pbar = progressbar.progressbar( maxval=(np.cumprod(self.axes_manager.navigation_shape)[-1] - masked_elements)) i = 0 for index in np.ndindex(tuple(self.axes_manager.navigation_shape)): if mask is None or not mask[index]: self.axes_manager.set_not_slicing_indexes(index) self.charge(only_fixed=charge_only_fixed) self.fit(fitter=fitter, grad=grad, **kwargs) i += 1 pbar.update(i) if autosave is True and i % autosave_every == 0: self.save_parameters2file(autosave_fn) pbar.finish() if autosave is True: messages.information('Deleting the temporary file %s pixels' % (autosave_fn + 'npz')) os.remove(autosave_fn + '.npz')
def multifit(self, mask = None, fitter = "leastsq", charge_only_fixed = False, grad = False, autosave = False, autosave_every = 10, **kwargs): if autosave is not False: fd, autosave_fn = tempfile.mkstemp(prefix = 'hyperspy_autosave-', dir = '.', suffix = '.npz') os.close(fd) autosave_fn = autosave_fn[:-4] messages.information( "Autosaving each %s pixels to %s.npz" % (autosave_every, autosave_fn)) messages.information( "When multifit finishes its job the file will be deleted") if mask is not None and \ (mask.shape != tuple(self.axes_manager.navigation_shape)): messages.warning_exit( "The mask must be an array with the same espatial dimensions as the" "navigation shape, %s" % self.axes_manager.navigation_shape) masked_elements = 0 if mask is None else mask.sum() pbar = progressbar.progressbar( maxval = (np.cumprod(self.axes_manager.navigation_shape)[-1] - masked_elements)) i = 0 for index in np.ndindex(tuple(self.axes_manager.navigation_shape)): if mask is None or not mask[index]: self.axes_manager.set_not_slicing_indexes(index) self.charge(only_fixed = charge_only_fixed) self.fit(fitter = fitter, grad = grad, **kwargs) i += 1 pbar.update(i) if autosave is True and i % autosave_every == 0: self.save_parameters2file(autosave_fn) pbar.finish() if autosave is True: messages.information( 'Deleting the temporary file %s pixels' % (autosave_fn + 'npz')) os.remove(autosave_fn + '.npz')
def normalize_poissonian_noise(self, navigation_mask = None, signal_mask = None, return_masks = False): """ Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to "normalize" the poissonian data for decomposition analysis Parameters ---------- navigation_mask : boolen numpy array signal_mask : boolen numpy array """ messages.information( "Scaling the data to normalize the (presumably) Poissonian noise") refold = self.unfold_if_multidim() dc = self.data if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = navigation_mask.ravel() if signal_mask is None: signal_mask = slice(None) # Rescale the data to gaussianize the poissonian noise aG = dc[:,signal_mask][navigation_mask,:].sum(1).squeeze() bH = dc[:,signal_mask][navigation_mask,:].sum(0).squeeze() # Checks if any is negative if (aG < 0).any() or (bH < 0).any(): messages.warning_exit( "Data error: negative values\n" "Are you sure that the data follow a poissonian distribution?") # Update the spatial and energy masks so it does not include rows # or colums that sum zero. aG0 = (aG == 0) bH0 = (bH == 0) if aG0.any(): if isinstance(navigation_mask, slice): # Convert the slice into a mask before setting its values navigation_mask = np.ones((self.data.shape[0]),dtype = 'bool') # Set colums summing zero as masked navigation_mask[aG0] = False aG = aG[aG0 == False] if bH0.any(): if isinstance(signal_mask, slice): # Convert the slice into a mask before setting its values signal_mask = np.ones((self.data.shape[1]), dtype = 'bool') # Set rows summing zero as masked signal_mask[bH0] = False bH = bH[bH0 == False] self._root_aG = np.sqrt(aG)[:, np.newaxis] self._root_bH = np.sqrt(bH)[np.newaxis, :] dc[:,signal_mask][navigation_mask,:] = \ (dc[:,signal_mask][navigation_mask,:] / (self._root_aG * self._root_bH)) if refold is True: print "Automatically refolding the SI after scaling" self.fold() if return_masks is True: if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None return navigation_mask, signal_mask
program_files = os.environ['PROGRAMFILES(X86)'] gos_path = os.path.join(program_files, gos) if os.path.isdir(gos_path) is False: gos_path = os.path.join(config_path, 'EELS_GOS') else: gos_path = os.path.join(config_path, 'EELS_GOS') return gos_path if os.path.isfile(defaults_file): # Remove config file if obsolated f = open(defaults_file) if 'Not really' in f.readline(): # It is the old config file f.close() messages.information('Removing obsoleted config file') os.remove(defaults_file) defaults_file_exists = False else: defaults_file_exists = True else: defaults_file_exists = False # Defaults template definition starts##################################### # This "section" is all that has to be modified to add or remove sections and # options from the defaults class GeneralConfig(t.HasTraits): default_file_format = t.Enum('hdf5', 'rpl', desc='Using the hdf5 format is highly reccomended because is the '
def principal_components_analysis(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, navigation_mask=None, signal_mask=None, center=False, variance2one=False, var_array=None, var_func=None, polyfit=None, on_peaks=False): """Principal components analysis. The results are stored in self.mva_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'} output_dimension : None or int number of PCA to keep navigation_mask : boolean numpy array signal_mask : boolean numpy array center : bool Perform energy centering before PCA variance2one : bool Perform whitening before PCA var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomy. polyfit : See also -------- plot_principal_components, plot_principal_components_maps, plot_lev """ # backup the original data if on_peaks: self._data_before_treatments = self.peak_chars.copy() else: self._data_before_treatments = self.data.copy() # Check for conflicting options and correct them when possible if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False: print \ """ The PCA algorithms from the MDP toolking (mdp and NIPALS) do not permit deactivating data centering. Therefore, the algorithm will proceed to center the data. """ center = True if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit( "With the mlpca algorithm the output_dimension must be expecified" ) if center is True and normalize_poissonian_noise is True: messages.warning( "Centering is not compatible with poissonian noise normalization\n" "Disabling centering") center = False if variance2one is True and normalize_poissonian_noise is True: messages.warning( "Variance normalization is not compatible with poissonian noise" "normalization.\n" "Disabling variance2one") variance2one = False # Apply pre-treatments # Centering if center is True: self.energy_center() # Variance normalization if variance2one is True: self.variance2one() # Transform the data in a line spectrum self._unfolded4pca = self.unfold_if_multidim() # Normalize the poissonian noise # Note that this function can change the masks if normalize_poissonian_noise is True: navigation_mask, signal_mask = \ self.normalize_poissonian_noise(navigation_mask = navigation_mask, signal_mask = signal_mask, return_masks = True) navigation_mask = self._correct_navigation_mask_when_unfolded( navigation_mask) messages.information('Performing principal components analysis') if on_peaks: dc = self.peak_chars else: # The data must be transposed both for Images and Spectra dc = self.data.T.squeeze() #set the output target (peak results or not?) target = self._get_target(on_peaks) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) if algorithm == 'mdp' or algorithm == 'NIPALS': if algorithm == 'mdp': target.pca_node = mdp.nodes.PCANode( output_dim=output_dimension, svd=True) elif algorithm == 'NIPALS': target.pca_node = mdp.nodes.NIPALSNode( output_dim=output_dimension) # Train the node print "\nPerforming the PCA node training" print "This include variance normalizing" target.pca_node.train(dc[signal_mask, :][:, navigation_mask]) print "Performing PCA projection" pc = target.pca_node.execute(dc[:, navigation_mask]) pca_v = target.pca_node.v pca_V = target.pca_node.d target.output_dimension = output_dimension elif algorithm == 'svd': pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask]) pc = np.dot(dc[:, navigation_mask], pca_v) elif algorithm == 'fast_svd': pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask], fast=True, output_dimension=output_dimension) pc = np.dot(dc[:, navigation_mask], pca_v) elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc.squeeze()[signal_mask, :][:, navigation_mask] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array keywords" "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask, ...][:, navigation_mask]) else: try: var_array = np.polyval( polyfit, dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True target.mlpca_output = mlpca( dc.squeeze()[signal_mask, :][:, navigation_mask], var_array.squeeze(), output_dimension, fast=fast) U, S, V, Sobj, ErrFlag = target.mlpca_output print "Performing PCA projection" pc = np.dot(dc[:, navigation_mask], V) pca_v = V pca_V = S**2 if output_dimension: print "trimming to %i dimensions" % output_dimension pca_v = pca_v[:, :output_dimension] pca_V = pca_V[:output_dimension] pc = pc[:, :output_dimension] target.pc = pc target.v = pca_v target.V = pca_V target.pca_algorithm = algorithm target.centered = center target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4pca target.variance2one = variance2one if self._unfolded4pca is True: target.original_shape = self._shape_before_unfolding # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.pc[signal_mask, :] *= self._root_bH target.v *= self._root_aG.T if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None #undo any pre-treatments self.undo_treatments(on_peaks) # Set the pixels that were not processed to nan if navigation_mask is not None or not isinstance( navigation_mask, slice): v = np.zeros((dc.shape[1], target.v.shape[1]), dtype=target.v.dtype) v[navigation_mask == False, :] = np.nan v[navigation_mask, :] = target.v target.v = v if self._unfolded4pca is True: self.fold() self._unfolded4pca is False
def principal_components_analysis(self, normalize_poissonian_noise = False, algorithm = 'svd', output_dimension = None, navigation_mask = None, signal_mask = None, center = False, variance2one = False, var_array = None, var_func = None, polyfit = None, on_peaks=False): """Principal components analysis. The results are stored in self.mva_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'} output_dimension : None or int number of PCA to keep navigation_mask : boolean numpy array signal_mask : boolean numpy array center : bool Perform energy centering before PCA variance2one : bool Perform whitening before PCA var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomy. polyfit : See also -------- plot_principal_components, plot_principal_components_maps, plot_lev """ # backup the original data if on_peaks: self._data_before_treatments = self.peak_chars.copy() else: self._data_before_treatments = self.data.copy() # Check for conflicting options and correct them when possible if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False: print \ """ The PCA algorithms from the MDP toolking (mdp and NIPALS) do not permit deactivating data centering. Therefore, the algorithm will proceed to center the data. """ center = True if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit( "With the mlpca algorithm the output_dimension must be expecified") if center is True and normalize_poissonian_noise is True: messages.warning( "Centering is not compatible with poissonian noise normalization\n" "Disabling centering") center = False if variance2one is True and normalize_poissonian_noise is True: messages.warning( "Variance normalization is not compatible with poissonian noise" "normalization.\n" "Disabling variance2one") variance2one = False # Apply pre-treatments # Centering if center is True: self.energy_center() # Variance normalization if variance2one is True: self.variance2one() # Transform the data in a line spectrum self._unfolded4pca = self.unfold_if_multidim() # Normalize the poissonian noise # Note that this function can change the masks if normalize_poissonian_noise is True: navigation_mask, signal_mask = \ self.normalize_poissonian_noise(navigation_mask = navigation_mask, signal_mask = signal_mask, return_masks = True) navigation_mask = self._correct_navigation_mask_when_unfolded(navigation_mask) messages.information('Performing principal components analysis') if on_peaks: dc=self.peak_chars else: # The data must be transposed both for Images and Spectra dc = self.data.T.squeeze() #set the output target (peak results or not?) target=self._get_target(on_peaks) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) if algorithm == 'mdp' or algorithm == 'NIPALS': if algorithm == 'mdp': target.pca_node = mdp.nodes.PCANode( output_dim=output_dimension, svd = True) elif algorithm == 'NIPALS': target.pca_node = mdp.nodes.NIPALSNode( output_dim=output_dimension) # Train the node print "\nPerforming the PCA node training" print "This include variance normalizing" target.pca_node.train( dc[signal_mask,:][:,navigation_mask]) print "Performing PCA projection" pc = target.pca_node.execute(dc[:,navigation_mask]) pca_v = target.pca_node.v pca_V = target.pca_node.d target.output_dimension = output_dimension elif algorithm == 'svd': pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask]) pc = np.dot(dc[:,navigation_mask], pca_v) elif algorithm == 'fast_svd': pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask], fast = True, output_dimension = output_dimension) pc = np.dot(dc[:,navigation_mask], pca_v) elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc.squeeze()[signal_mask,:][:,navigation_mask] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array keywords" "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask,...][:,navigation_mask]) else: try: var_array = np.polyval(polyfit,dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True target.mlpca_output = mlpca( dc.squeeze()[signal_mask,:][:,navigation_mask], var_array.squeeze(), output_dimension, fast = fast) U,S,V,Sobj, ErrFlag = target.mlpca_output print "Performing PCA projection" pc = np.dot(dc[:,navigation_mask], V) pca_v = V pca_V = S ** 2 if output_dimension: print "trimming to %i dimensions"%output_dimension pca_v = pca_v[:,:output_dimension] pca_V = pca_V[:output_dimension] pc = pc[:,:output_dimension] target.pc = pc target.v = pca_v target.V = pca_V target.pca_algorithm = algorithm target.centered = center target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4pca target.variance2one = variance2one if self._unfolded4pca is True: target.original_shape = self._shape_before_unfolding # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.pc[signal_mask,:] *= self._root_bH target.v *= self._root_aG.T if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None #undo any pre-treatments self.undo_treatments(on_peaks) # Set the pixels that were not processed to nan if navigation_mask is not None or not isinstance(navigation_mask, slice): v = np.zeros((dc.shape[1], target.v.shape[1]), dtype = target.v.dtype) v[navigation_mask == False,:] = np.nan v[navigation_mask,:] = target.v target.v = v if self._unfolded4pca is True: self.fold() self._unfolded4pca is False
def normalize_poissonian_noise(self, navigation_mask=None, signal_mask=None, return_masks=False): """ Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to "normalize" the poissonian data for PCA analysis Parameters ---------- navigation_mask : boolen numpy array signal_mask : boolen numpy array """ messages.information( "Scaling the data to normalize the (presumably) Poissonian noise") # If energy axis is not first, it needs to be for MVA. refold = self.unfold_if_multidim() dc = self.data.T.squeeze().copy() navigation_mask = \ self._correct_navigation_mask_when_unfolded(navigation_mask) if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) # Rescale the data to gaussianize the poissonian noise aG = dc[signal_mask, :][:, navigation_mask].sum(0).squeeze() bH = dc[signal_mask, :][:, navigation_mask].sum(1).squeeze() # Checks if any is negative if (aG < 0).any() or (bH < 0).any(): messages.warning_exit( "Data error: negative values\n" "Are you sure that the data follow a poissonian distribution?") # Update the spatial and energy masks so it does not include rows # or colums that sum zero. aG0 = (aG == 0) bH0 = (bH == 0) if aG0.any(): if isinstance(navigation_mask, slice): # Convert the slice into a mask before setting its values navigation_mask = np.ones((self.data.shape[1]), dtype='bool') # Set colums summing zero as masked navigation_mask[aG0] = False aG = aG[aG0 == False] if bH0.any(): if isinstance(signal_mask, slice): # Convert the slice into a mask before setting its values signal_mask = np.ones((self.data.shape[0]), dtype='bool') # Set rows summing zero as masked signal_mask[bH0] = False bH = bH[bH0 == False] self._root_aG = np.sqrt(aG)[np.newaxis, :] self._root_bH = np.sqrt(bH)[:, np.newaxis] temp = (dc[signal_mask, :][:, navigation_mask] / (self._root_aG * self._root_bH)) if isinstance(signal_mask, slice) or isinstance( navigation_mask, slice): dc[signal_mask, navigation_mask] = temp else: mask3D = signal_mask[:, np.newaxis] * \ navigation_mask[np.newaxis, :] dc[mask3D] = temp.ravel() # TODO - dc was never modifying self.data - was normalization ever # really getting applied? Comment next lines as necessary. self.data = dc.T.copy() # end normalization write to self.data. if refold is True: print "Automatically refolding the SI after scaling" self.fold() if return_masks is True: if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None return navigation_mask, signal_mask
def load(filenames=None, record_by=None, signal_type=None, signal_origin=None, stack=False, stack_axis=None, new_axis_name="stack_element", mmap=False, mmap_dir=None, **kwds): """ Load potentially multiple supported file into an hyperspy structure Supported formats: HDF5, msa, Gatan dm3, Ripple (rpl+raw) FEI ser and emi and hdf5, tif and a number of image formats. Any extra keyword is passed to the corresponsing reader. For available options see their individual documentation. Parameters ---------- filenames : None, str or list of strings The filename to be loaded. If None, a window will open to select a file to load. If a valid filename is passed in that single file is loaded. If multiple file names are passed in a list, a list of objects or a single object containing the data of the individual files stacked are returned. This behaviour is controlled by the `stack` parameter (see bellow). Multiple files can be loaded by using simple shell-style wildcards, e.g. 'my_file*.msa' loads all the files that starts by 'my_file' and has the '.msa' extension. record_by : {None, 'spectrum', 'image', ""} The value provided may determine the Signal subclass assigned to the data. If None, the value is read or guessed from the file. Any other value overrides the value stored in the file if any. If "spectrum" load the data in a Spectrum (sub)class. If "image" load the data in an Image (sub)class. If "" (empty string) load the data in a Signal class. signal_type : {None, "EELS", "EDS_TEM", "EDS_SEM", "", str} The acronym that identifies the signal type. The value provided may determine the Signal subclass assigned to the data. If None the value is read/guessed from the file. Any other value overrides the value stored in the file if any. For electron energy-loss spectroscopy use "EELS". For energy dispersive x-rays use "EDS_TEM" if acquired from an electron-transparent sample — as it is usually the case in a transmission electron microscope (TEM) —, "EDS_SEM" if acquired from a non electron-transparent sample — as it is usually the case in a scanning electron microscope (SEM) —. If "" (empty string) the value is not read from the file and is considered undefined. signal_origin : {None, "experiment", "simulation", ""} Defines the origin of the signal. The value provided may determine the Signal subclass assigned to the data. If None the value is read/guessed from the file. Any other value overrides the value stored in the file if any. Use "experiment" if loading experimental data. Use "simulation" if loading simulated data. If "" (empty string) the value is not read from the file and is considered undefined. stack : bool If True and multiple filenames are passed in, stacking all the data into a single object is attempted. All files must match in shape. It is possible to store the data in a memory mapped temporary file instead of in memory setting mmap_mode. The title is set to the name of the folder containing the files. stack_axis : {None, int, str} If None, the signals are stacked over a new axis. The data must have the same dimensions. Otherwise the signals are stacked over the axis given by its integer index or its name. The data must have the same shape, except in the dimension corresponding to `axis`. new_axis_name : string The name of the new axis when `axis` is None. If an axis with this name already exists it automatically append '-i', where `i` are integers, until it finds a name that is not yet in use. mmap: bool If True and stack is True, then the data is stored in a memory-mapped temporary file.The memory-mapped data is stored on disk, and not directly loaded into memory. Memory mapping is especially useful for accessing small fragments of large files without reading the entire file into memory. mmap_dir : string If mmap_dir is not None, and stack and mmap are True, the memory mapped file will be created in the given directory, otherwise the default directory is used. Returns ------- Signal instance or list of signal instances Examples -------- Loading a single file providing the signal type: >>> d = load('file.dm3', signal_type='EDS_TEM') Loading a single file and overriding its default record_by: >>> d = load('file.dm3', record_by='Image') Loading multiple files: >>> d = load('file1.dm3','file2.dm3') Loading multiple files matching the pattern: >>>d = load('file*.dm3') """ kwds['record_by'] = record_by kwds['signal_type'] = signal_type kwds['signal_origin'] = signal_origin if filenames is None: if hyperspy.defaults_parser.preferences.General.interactive is True: from hyperspy.gui.tools import Load load_ui = Load() load_ui.edit_traits() if load_ui.filename: filenames = load_ui.filename else: raise ValueError("No file provided to reader and " "interactive mode is disabled") if filenames is None: raise ValueError("No file provided to reader") if isinstance(filenames, basestring): filenames = natsorted( [f for f in glob.glob(filenames) if os.path.isfile(f)]) if not filenames: raise ValueError('No file name matches this pattern') elif not isinstance(filenames, (list, tuple)): raise ValueError( 'The filenames parameter must be a list, tuple, string or None') if not filenames: raise ValueError('No file provided to reader.') return None else: if len(filenames) > 1: messages.information('Loading individual files') if stack is True: signal = [] for i, filename in enumerate(filenames): obj = load_single_file(filename, **kwds) signal.append(obj) signal = hyperspy.utils.stack(signal, axis=stack_axis, new_axis_name=new_axis_name, mmap=mmap, mmap_dir=mmap_dir) signal.mapped_parameters.title = \ os.path.split( os.path.split( os.path.abspath(filenames[0]) )[0] )[1] messages.information('Individual files loaded correctly') signal._print_summary() objects = [ signal, ] else: objects = [ load_single_file(filename, **kwds) for filename in filenames ] if hyperspy.defaults_parser.preferences.General.plot_on_load: for obj in objects: obj.plot() if len(objects) == 1: objects = objects[0] return objects
def decomposition(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, centre=None, auto_transpose=True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, reproject=None, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.learning_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array The navigation locations marked as True are not used in the decompostion. signal_mask : boolean numpy array The signal locations marked as True are not used in the decomposition. var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. polyfit : reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. See also -------- plot_decomposition_factors, plot_decomposition_loadings, plot_lev """ # Check if it is the wrong data type if self.data.dtype.char not in ['e', 'f', 'd']: # If not float messages.warning( 'To perform a decomposition the data must be of the float ' 'type. You can change the type using the change_dtype method' ' e.g. s.change_dtype(\'float64\')\n' 'Nothing done.') return if self.axes_manager.navigation_size < 2: raise AttributeError("It is not possible to decompose a dataset " "with navigation_size < 2") # backup the original data self._data_before_treatments = self.data.copy() if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: raise ValueError("With the mlpca algorithm the " "output_dimension must be expecified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold() try: if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask,) messages.information('Performing decomposition analysis') # The rest of the code assumes that the first data axis # is the navigation axis. We transpose the data if that is not the # case. dc = (self.data if self.axes_manager[0].index_in_array == 0 else self.data.T) # set the output target (peak results or not?) target = self.learning_results # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # WARNING: signal_mask and navigation_mask values are now their # negaties i.e. True -> False and viceversa. However, the # stored value (at the end of the method) coincides with the # input masks # Reset the explained_variance which is not set by all the # algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], centre=centre, auto_transpose=auto_transpose) elif algorithm == 'fast_svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], fast=True, output_dimension=output_dimension, centre=centre, auto_transpose=auto_transpose) elif algorithm == 'sklearn_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' elif algorithm == 'nmf': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T elif algorithm == 'sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.SparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T elif algorithm == 'mini_batch_sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: raise ValueError( "For MLPCA it is mandatory to define the " "output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc[:, signal_mask][navigation_mask, :] if var_array is not None and var_func is not None: raise ValueError( "You have defined both the var_func and var_array " "keywords." "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func( dc[signal_mask, ...][:, navigation_mask]) else: try: var_array = np.polyval( polyfit, dc[ signal_mask, navigation_mask]) except: raise ValueError( 'var_func must be either a function or an ' 'array defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U, S, V, Sobj, ErrFlag = mlpca( dc[:, signal_mask][navigation_mask, :], var_array, output_dimension, fast=fast) loadings = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) else: raise ValueError('Algorithm not recognised. ' 'Nothing done') # We must calculate the ratio here because otherwise the sum # information can be lost if the user call # crop_decomposition_dimension if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in learning_results target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a # previous decomposition target.unmixing_matrix = None target.bss_algorithm = None if self._unfolded4decomposition is True: folding = \ self.metadata._HyperSpy.Folding target.original_shape = folding.original_shape # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): loadings_ = np.dot(dc[:, signal_mask] - mean, factors) else: loadings_ = sk.transform(dc[:, signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(loadings), dc[navigation_mask, :] - mean).T target.factors = factors else: messages.information("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.loadings[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): # Store the (inverted, as inputed) signal mask target.signal_mask = ~signal_mask.reshape( self.axes_manager._signal_shape_in_array) if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask, :] = target.factors factors[~signal_mask, :] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): # Store the (inverted, as inputed) navigation mask target.navigation_mask = ~navigation_mask.reshape( self.axes_manager._navigation_shape_in_array) if reproject not in ('both', 'navigation'): loadings = np.zeros( (dc.shape[0], target.loadings.shape[1])) loadings[navigation_mask, :] = target.loadings loadings[~navigation_mask, :] = np.nan target.loadings = loadings finally: # undo any pre-treatments self.undo_treatments() if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False
def load(filenames=None, record_by=None, signal_type=None, stack=False, mmap=False, mmap_dir=None, **kwds): """ Load potentially multiple supported file into an hyperspy structure Supported formats: HDF5, msa, Gatan dm3, Ripple (rpl+raw) FEI ser and emi and hdf5, tif and a number of image formats. Any extra keyword is passed to the corresponsing reader. For available options see their individual documentation. Parameters ---------- filenames : None, str or list of strings The filename to be loaded. If None, a window will open to select a file to load. If a valid filename is passed in that single file is loaded. If multiple file names are passed in a list, a list of objects or a single object containing the data of the individual files stacked are returned. This behaviour is controlled by the `stack` parameter (see bellow). Multiple files can be loaded by using simple shell-style wildcards, e.g. 'my_file*.msa' loads all the files that starts by 'my_file' and has the '.msa' extension. record_by : None | 'spectrum' | 'image' Manually set the way in which the data will be read. Possible values are 'spectrum' or 'image'. signal_type : str Manually set the signal type of the data. Although only setting signal type to 'EELS' will currently change the way the data is loaded, it is good practice to set this parameter so it can be stored when saving the file. Please note that, if the signal_type is already defined in the file the information will be overriden without warning. stack : bool If True and multiple filenames are passed in, stacking all the data into a single object is attempted. All files must match in shape. It is possible to store the data in a memory mapped temporary file instead of in memory setting mmap_mode. mmap: bool If True and stack is True, then the data is stored in a memory-mapped temporary file.The memory-mapped data is stored on disk, and not directly loaded into memory. Memory mapping is especially useful for accessing small fragments of large files without reading the entire file into memory. mmap_dir : string If mmap_dir is not None, and stack and mmap are True, the memory mapped file will be created in the given directory, otherwise the default directory is used. Returns ------- Signal instance or list of signal instances Examples -------- Loading a single file providing the signal type: >>> d = load('file.dm3', signal_type='XPS') Loading a single file and overriding its default record_by: >>> d = load('file.dm3', record_by='Image') Loading multiple files: >>> d = load('file1.dm3','file2.dm3') Loading multiple files matching the pattern: >>>d = load('file*.dm3') """ if filenames is None: if hyperspy.defaults_parser.preferences.General.interactive is True: load_ui = Load() load_ui.edit_traits() if load_ui.filename: filenames = load_ui.filename else: raise ValueError("No file provided to reader and " "interactive mode is disabled") if filenames is None: raise ValueError("No file provided to reader") if isinstance(filenames, basestring): filenames=natsorted([f for f in glob.glob(filenames) if os.path.isfile(f)]) if not filenames: raise ValueError('No file name matches this pattern') elif not isinstance(filenames, (list, tuple)): raise ValueError( 'The filenames parameter must be a list, tuple, string or None') if not filenames: raise ValueError('No file provided to reader.') return None else: if len(filenames) > 1: messages.information('Loading individual files') if stack is True: original_shape = None for i, filename in enumerate(filenames): obj = load_single_file(filename, output_level=0,**kwds) if original_shape is None: original_shape = obj.data.shape record_by = obj.mapped_parameters.record_by stack_shape = tuple([len(filenames),]) + original_shape tempf = None if mmap is False: data = np.empty(stack_shape, dtype=obj.data.dtype) else: #filename = os.path.join(tempfile.mkdtemp(), #'newfile.dat') tempf = tempfile.NamedTemporaryFile( dir=mmap_dir) data = np.memmap(tempf, dtype=obj.data.dtype, mode = 'w+', shape=stack_shape,) signal = type(obj)( {'data' : data}) # Store the temporary file in the signal class to # avoid its deletion when garbage collecting if tempf is not None: signal._data_temporary_file = tempf signal.axes_manager.axes[1:] = obj.axes_manager.axes signal.axes_manager._set_axes_index_in_array_from_position() eaxis = signal.axes_manager.axes[0] eaxis.name = 'stack_element' eaxis.navigate = True signal.mapped_parameters = obj.mapped_parameters signal.mapped_parameters.original_filename = '' signal.mapped_parameters.title = \ os.path.split(os.path.split( os.path.abspath(filenames[0]))[0])[1] signal.original_parameters = DictionaryBrowser({}) signal.original_parameters.add_node('stack_elements') if obj.data.shape != original_shape: raise IOError( "Only files with data of the same shape can be stacked") signal.data[i,...] = obj.data signal.original_parameters.stack_elements.add_node( 'element%i' % i) node = signal.original_parameters.stack_elements[ 'element%i' % i] node.original_parameters = \ obj.original_parameters.as_dictionary() node.mapped_parameters = \ obj.mapped_parameters.as_dictionary() del obj messages.information('Individual files loaded correctly') print signal objects = [signal,] else: objects=[load_single_file(filename, output_level=0,**kwds) for filename in filenames] if hyperspy.defaults_parser.preferences.General.plot_on_load: for obj in objects: obj.plot() if len(objects) == 1: objects = objects[0] return objects
def load(filenames=None, record_by=None, signal_type=None, signal_origin=None, stack=False, stack_axis=None, new_axis_name="stack_element", mmap=False, mmap_dir=None, **kwds): """ Load potentially multiple supported file into an hyperspy structure Supported formats: HDF5, msa, Gatan dm3, Ripple (rpl+raw) FEI ser and emi and hdf5, tif and a number of image formats. Any extra keyword is passed to the corresponsing reader. For available options see their individual documentation. Parameters ---------- filenames : None, str or list of strings The filename to be loaded. If None, a window will open to select a file to load. If a valid filename is passed in that single file is loaded. If multiple file names are passed in a list, a list of objects or a single object containing the data of the individual files stacked are returned. This behaviour is controlled by the `stack` parameter (see bellow). Multiple files can be loaded by using simple shell-style wildcards, e.g. 'my_file*.msa' loads all the files that starts by 'my_file' and has the '.msa' extension. record_by : {None, 'spectrum', 'image', ""} The value provided may determine the Signal subclass assigned to the data. If None, the value is read or guessed from the file. Any other value overrides the value stored in the file if any. If "spectrum" load the data in a Spectrum (sub)class. If "image" load the data in an Image (sub)class. If "" (empty string) load the data in a Signal class. signal_type : {None, "EELS", "EDS_TEM", "EDS_SEM", "", str} The acronym that identifies the signal type. The value provided may determine the Signal subclass assigned to the data. If None the value is read/guessed from the file. Any other value overrides the value stored in the file if any. For electron energy-loss spectroscopy use "EELS". For energy dispersive x-rays use "EDS_TEM" if acquired from an electron-transparent sample — as it is usually the case in a transmission electron microscope (TEM) —, "EDS_SEM" if acquired from a non electron-transparent sample — as it is usually the case in a scanning electron microscope (SEM) —. If "" (empty string) the value is not read from the file and is considered undefined. signal_origin : {None, "experiment", "simulation", ""} Defines the origin of the signal. The value provided may determine the Signal subclass assigned to the data. If None the value is read/guessed from the file. Any other value overrides the value stored in the file if any. Use "experiment" if loading experimental data. Use "simulation" if loading simulated data. If "" (empty string) the value is not read from the file and is considered undefined. stack : bool If True and multiple filenames are passed in, stacking all the data into a single object is attempted. All files must match in shape. It is possible to store the data in a memory mapped temporary file instead of in memory setting mmap_mode. The title is set to the name of the folder containing the files. stack_axis : {None, int, str} If None, the signals are stacked over a new axis. The data must have the same dimensions. Otherwise the signals are stacked over the axis given by its integer index or its name. The data must have the same shape, except in the dimension corresponding to `axis`. new_axis_name : string The name of the new axis when `axis` is None. If an axis with this name already exists it automatically append '-i', where `i` are integers, until it finds a name that is not yet in use. mmap: bool If True and stack is True, then the data is stored in a memory-mapped temporary file.The memory-mapped data is stored on disk, and not directly loaded into memory. Memory mapping is especially useful for accessing small fragments of large files without reading the entire file into memory. mmap_dir : string If mmap_dir is not None, and stack and mmap are True, the memory mapped file will be created in the given directory, otherwise the default directory is used. Returns ------- Signal instance or list of signal instances Examples -------- Loading a single file providing the signal type: >>> d = load('file.dm3', signal_type='EDS_TEM') Loading a single file and overriding its default record_by: >>> d = load('file.dm3', record_by='Image') Loading multiple files: >>> d = load('file1.dm3','file2.dm3') Loading multiple files matching the pattern: >>>d = load('file*.dm3') """ kwds['record_by'] = record_by kwds['signal_type'] = signal_type kwds['signal_origin'] = signal_origin if filenames is None: if hyperspy.defaults_parser.preferences.General.interactive is True: from hyperspy.gui.tools import Load load_ui = Load() load_ui.edit_traits() if load_ui.filename: filenames = load_ui.filename else: raise ValueError("No file provided to reader and " "interactive mode is disabled") if filenames is None: raise ValueError("No file provided to reader") if isinstance(filenames, basestring): filenames = natsorted([f for f in glob.glob(filenames) if os.path.isfile(f)]) if not filenames: raise ValueError('No file name matches this pattern') elif not isinstance(filenames, (list, tuple)): raise ValueError( 'The filenames parameter must be a list, tuple, string or None') if not filenames: raise ValueError('No file provided to reader.') return None else: if len(filenames) > 1: messages.information('Loading individual files') if stack is True: signal = [] for i, filename in enumerate(filenames): obj = load_single_file(filename, **kwds) signal.append(obj) signal = hyperspy.utils.stack(signal, axis=stack_axis, new_axis_name=new_axis_name, mmap=mmap, mmap_dir=mmap_dir) signal.metadata.General.title = \ os.path.split( os.path.split( os.path.abspath(filenames[0]) )[0] )[1] messages.information('Individual files loaded correctly') signal._print_summary() objects = [signal, ] else: objects = [load_single_file(filename, **kwds) for filename in filenames] if hyperspy.defaults_parser.preferences.Plot.plot_on_load: for obj in objects: obj.plot() if len(objects) == 1: objects = objects[0] return objects
def decomposition(self, normalize_poissonian_noise=False, algorithm = 'svd', output_dimension=None, centre=None, auto_transpose=True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, reproject=None, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.learning_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array The navigation locations marked as True are not used in the decompostion. signal_mask : boolean numpy array The signal locations marked as True are not used in the decomposition. var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. polyfit : reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. See also -------- plot_decomposition_factors, plot_decomposition_loadings, plot_lev """ # Check if it is the wrong data type if self.data.dtype.char not in ['e', 'f', 'd']: # If not float messages.warning( 'To perform a decomposition the data must be of the float type.' ' You can change the type using the change_dtype method' ' e.g. s.change_dtype(\'float64\')\n' 'Nothing done.') return # backup the original data self._data_before_treatments = self.data.copy() if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit("With the mlpca algorithm the " "output_dimension must be expecified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold_if_multidim() try: if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask,) messages.information('Performing decomposition analysis') dc = self.data #set the output target (peak results or not?) target = self.learning_results # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # WARNING: signal_mask and navigation_mask values are now their # negaties i.e. True -> False and viceversa. However, the # stored value (at the end of the method) coincides with the # input masks # Reset the explained_variance which is not set by all the # algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, loadings, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], centre = centre, auto_transpose = auto_transpose) elif algorithm == 'fast_svd': factors, loadings, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], fast=True, output_dimension=output_dimension, centre=centre, auto_transpose=auto_transpose) elif algorithm == 'sklearn_pca': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' elif algorithm == 'nmf': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T elif algorithm == 'sparse_pca': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.SparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mini_batch_sparse_pca': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.MiniBatchSparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the " "output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc[:,signal_mask][navigation_mask,:] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array " "keywords." "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func( dc[signal_mask,...][:,navigation_mask]) else: try: var_array = np.polyval(polyfit,dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U,S,V,Sobj, ErrFlag = mlpca( dc[:,signal_mask][navigation_mask,:], var_array, output_dimension, fast = fast) loadings = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) else: raise ValueError('Algorithm not recognised. ' 'Nothing done') # We must calculate the ratio here because otherwise the sum # information can be lost if the user call # crop_decomposition_dimension if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in learning_results target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a previous # decompositions target.unmixing_matrix = None target.bss_algorithm = None if self._unfolded4decomposition is True: folding = \ self.mapped_parameters._internal_parameters.folding target.original_shape = folding.original_shape # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): loadings_ = np.dot(dc[:,signal_mask] - mean, factors) else: loadings_ = sk.transform(dc[:,signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(loadings), dc[navigation_mask,:] - mean).T target.factors = factors else: messages.information("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.loadings[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): # Store the (inverted, as inputed) signal mask target.signal_mask = ~signal_mask.reshape( self.axes_manager._signal_shape_in_array) if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask == True,:] = target.factors factors[signal_mask == False,:] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): # Store the (inverted, as inputed) navigation mask target.navigation_mask = ~navigation_mask.reshape( self.axes_manager._navigation_shape_in_array) if reproject not in ('both', 'navigation'): loadings = np.zeros((dc.shape[0], target.loadings.shape[1])) loadings[navigation_mask == True,:] = target.loadings loadings[navigation_mask == False,:] = np.nan target.loadings = loadings finally: #undo any pre-treatments self.undo_treatments() if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False
import os.path import shutil from hyperspy import messages config_files = list() data_path = os.sep.join([os.path.dirname(__file__), '..', 'data']) if os.name == 'posix': config_path = os.path.join(os.path.expanduser('~'), '.hyperspy') os_name = 'posix' elif os.name in ['nt', 'dos']: ## appdata = os.environ['APPDATA'] config_path = os.path.expanduser('~/.hyperspy') # if os.path.isdir(appdata) is False: # os.mkdir(appdata) ## config_path = os.path.join(os.environ['APPDATA'], 'hyperspy') os_name = 'windows' else: messages.warning_exit('Unsupported operating system: %s' % os.name) if os.path.isdir(config_path) is False: messages.information("Creating config directory: %s" % config_path) os.mkdir(config_path) for file in config_files: templates_file = os.path.join(data_path, file) config_file = os.path.join(config_path, file) if os.path.isfile(config_file) is False: messages.information("Setting configuration file: %s" % file) shutil.copy(templates_file, config_file)
program_files = os.environ['PROGRAMFILES(X86)'] gos_path = os.path.join(program_files, gos) if os.path.isdir(gos_path) is False: gos_path = os.path.join(config_path, 'EELS_GOS') else: gos_path = os.path.join(config_path, 'EELS_GOS') return gos_path if os.path.isfile(defaults_file): # Remove config file if obsolated f = open(defaults_file) if 'Not really' in f.readline(): # It is the old config file f.close() messages.information('Removing obsoleted config file') os.remove(defaults_file) defaults_file_exists = False else: defaults_file_exists = True else: defaults_file_exists = False # Defaults template definition starts##################################### # This "section" is all that has to be modified to add or remove sections and # options from the defaults class GeneralConfig(t.HasTraits): default_file_format = t.Enum( 'hdf5',
import os.path import shutil from hyperspy import messages config_files = ['hyperspyrc', 'edges_db.csv'] data_path = os.sep.join([os.path.dirname(__file__), '..', 'data']) if os.name == 'posix': config_path = os.path.join(os.path.expanduser('~'), '.hyperspy') os_name = 'posix' elif os.name in ['nt', 'dos']: ## appdata = os.environ['APPDATA'] config_path = os.path.expanduser('~/.hyperspy') ## if os.path.isdir(appdata) is False: ## os.mkdir(appdata) ## config_path = os.path.join(os.environ['APPDATA'], 'hyperspy') os_name = 'windows' else: messages.warning_exit('Unsupported operating system: %s' % os.name) if os.path.isdir(config_path) is False: messages.information("Creating config directory: %s" % config_path) os.mkdir(config_path) for file in config_files: templates_file = os.path.join(data_path, file) config_file = os.path.join(config_path, file) if os.path.isfile(config_file) is False: messages.information("Setting configuration file: %s" % file) shutil.copy(templates_file, config_file)
program_files = os.environ["PROGRAMFILES(X86)"] gos_path = os.path.join(program_files, gos) if os.path.isdir(gos_path) is False: gos_path = os.path.join(config_path, "EELS_GOS") else: gos_path = os.path.join(config_path, "EELS_GOS") return gos_path if os.path.isfile(defaults_file): # Remove config file if obsolated f = open(defaults_file) if "Not really" in f.readline(): # It is the old config file f.close() messages.information("Removing obsoleted config file") os.remove(defaults_file) defaults_file_exists = False else: defaults_file_exists = True else: defaults_file_exists = False # Defaults template definition starts##################################### # This "section" is all that has to be modified to add or remove sections and # options from the defaults class GeneralConfig(t.HasTraits): default_file_format = t.Enum( "hdf5",
def multifit(self, mask=None, charge_only_fixed=False, autosave=False, autosave_every=10, **kwargs): """Fit the data to the model at all the positions of the navigation dimensions. Parameters ---------- mask : {None, numpy.array} To mask (do not fit) at certain position pass a numpy.array of type bool where True indicates that the data will not be fitted at the given position. charge_only_fixed : bool If True, only the fixed parameters values will be updated when changing the positon. autosave : bool If True, the result of the fit will be saved automatically with a frequency defined by autosave_every. autosave_every : int Save the result of fitting every given number of spectra. **kwargs : key word arguments Any extra key word argument will be passed to the fit method. See the fit method documentation for a list of valid arguments. See Also -------- fit """ if autosave is not False: fd, autosave_fn = tempfile.mkstemp( prefix = 'hyperspy_autosave-', dir = '.', suffix = '.npz') os.close(fd) autosave_fn = autosave_fn[:-4] messages.information( "Autosaving each %s pixels to %s.npz" % (autosave_every, autosave_fn)) messages.information( "When multifit finishes its job the file will be deleted") if mask is not None and \ (mask.shape != tuple(self.axes_manager.navigation_shape)): messages.warning_exit( "The mask must be a numpy array of boolen type with " " the same shape as the navigation: %s" % self.axes_manager.navigation_shape) masked_elements = 0 if mask is None else mask.sum() maxval=self.axes_manager.navigation_size - masked_elements if maxval > 0: pbar = progressbar.progressbar(maxval=maxval) if 'bounded' in kwargs and kwargs['bounded'] is True: if kwargs['fitter'] == 'mpfit': self.set_mpfit_parameters_info() kwargs['bounded'] = None elif kwargs['fitter'] in ("tnc", "l_bfgs_b"): self.set_boundaries() kwargs['bounded'] = None else: messages.information( "The chosen fitter does not suppport bounding." "If you require boundinig please select one of the " "following fitters instead: mpfit, tnc, l_bfgs_b") kwargs['bounded'] = False i = 0 for index in self.axes_manager: if mask is None or not mask[index]: self.fit(**kwargs) i += 1 if maxval > 0: pbar.update(i) if autosave is True and i % autosave_every == 0: self.save_parameters2file(autosave_fn) if maxval > 0: pbar.finish() if autosave is True: messages.information( 'Deleting the temporary file %s pixels' % ( autosave_fn + 'npz')) os.remove(autosave_fn + '.npz')
def normalize_poissonian_noise(self, navigation_mask = None, signal_mask = None, return_masks = False): """ Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to "normalize" the poissonian data for PCA analysis Parameters ---------- navigation_mask : boolen numpy array signal_mask : boolen numpy array """ messages.information( "Scaling the data to normalize the (presumably) Poissonian noise") # If energy axis is not first, it needs to be for MVA. refold = self.unfold_if_multidim() dc = self.data.T.squeeze().copy() navigation_mask = \ self._correct_navigation_mask_when_unfolded(navigation_mask) if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) # Rescale the data to gaussianize the poissonian noise aG = dc[signal_mask,:][:,navigation_mask].sum(0).squeeze() bH = dc[signal_mask,:][:,navigation_mask].sum(1).squeeze() # Checks if any is negative if (aG < 0).any() or (bH < 0).any(): messages.warning_exit( "Data error: negative values\n" "Are you sure that the data follow a poissonian distribution?") # Update the spatial and energy masks so it does not include rows # or colums that sum zero. aG0 = (aG == 0) bH0 = (bH == 0) if aG0.any(): if isinstance(navigation_mask, slice): # Convert the slice into a mask before setting its values navigation_mask = np.ones((self.data.shape[1]),dtype = 'bool') # Set colums summing zero as masked navigation_mask[aG0] = False aG = aG[aG0 == False] if bH0.any(): if isinstance(signal_mask, slice): # Convert the slice into a mask before setting its values signal_mask = np.ones((self.data.shape[0]), dtype = 'bool') # Set rows summing zero as masked signal_mask[bH0] = False bH = bH[bH0 == False] self._root_aG = np.sqrt(aG)[np.newaxis,:] self._root_bH = np.sqrt(bH)[:, np.newaxis] temp = (dc[signal_mask,:][:,navigation_mask] / (self._root_aG * self._root_bH)) if isinstance(signal_mask,slice) or isinstance(navigation_mask,slice): dc[signal_mask,navigation_mask] = temp else: mask3D = signal_mask[:, np.newaxis] * \ navigation_mask[np.newaxis, :] dc[mask3D] = temp.ravel() # TODO - dc was never modifying self.data - was normalization ever # really getting applied? Comment next lines as necessary. self.data = dc.T.copy() # end normalization write to self.data. if refold is True: print "Automatically refolding the SI after scaling" self.fold() if return_masks is True: if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None return navigation_mask, signal_mask
def decomposition(self, normalize_poissonian_noise=False, algorithm = 'svd', output_dimension=None, centre = None, auto_transpose = True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, on_peaks=False, reproject=None, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.mva_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array signal_mask : boolean numpy array var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. polyfit : reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. See also -------- plot_decomposition_factors, plot_decomposition_scores, plot_lev """ # backup the original data if on_peaks: if hasattr(self.mapped_parameters,'peak_chars'): self._data_before_treatments = \ self.mapped_parameters.peak_chars.copy() else: print """No peak characteristics found. You must run the peak_char_stack function to obtain these before you can run PCA or ICA on them.""" else: self._data_before_treatments = self.data.copy() if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit("With the mlpca algorithm the " "output_dimension must be expecified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold_if_multidim() if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: if reproject is None: navigation_mask, signal_mask = \ self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask, return_masks = True) elif reproject == 'both': _, _ = \ self.normalize_poissonian_noise(return_masks = True) elif reproject == 'navigation': _, signal_mask = \ self.normalize_poissonian_noise(return_masks = True, signal_mask=signal_mask,) elif reproject == 'signal': navigation_mask, _ = \ self.normalize_poissonian_noise(return_masks = True, navigation_mask=navigation_mask,) messages.information('Performing decomposition analysis') if on_peaks: dc = self.mapped_parameters.peak_chars else: # The data must be transposed both for Images and Spectra dc = self.data #set the output target (peak results or not?) target = self._get_target(on_peaks) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) # Reset the explained_variance which is not set by all the algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, scores, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], centre = centre, auto_transpose = auto_transpose) elif algorithm == 'fast_svd': factors, scores, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], fast = True, output_dimension = output_dimension, centre = centre, auto_transpose = auto_transpose) elif algorithm == 'sklearn_pca': sk = sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' elif algorithm == 'nmf': sk = sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T elif algorithm == 'sparse_pca': sk = sklearn.decomposition.SparsePCA(output_dimension, **kwargs) scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mini_batch_sparse_pca': sk = sklearn.decomposition.MiniBatchSparsePCA(output_dimension, **kwargs) scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc[:,signal_mask][navigation_mask,:] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array keywords" "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask,...][:,navigation_mask]) else: try: var_array = np.polyval(polyfit,dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U,S,V,Sobj, ErrFlag = mlpca( dc[:,signal_mask][navigation_mask,:], var_array, output_dimension, fast = fast) scores = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) else: messages.information('Error: Algorithm not recognised. ' 'Nothing done') return False # We must calculate the ratio here because otherwise the sum information # can be lost if the user call crop_decomposition_dimension if explained_variance is not None and explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in mva_results target.factors = factors target.scores = scores target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a previous # decompositions target.unmixing_matrix = None target.ica_algorithm = None if self._unfolded4decomposition is True: target.original_shape = self._shape_before_unfolding # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): scores_ = np.dot(dc[:,signal_mask] - mean, factors) else: scores_ = sk.transform(dc[:,signal_mask]) target.scores = scores_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(scores), dc[navigation_mask,:] - mean).T target.factors = factors else: messages.information("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.scores[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): target.signal_mask = signal_mask if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask == True,:] = target.factors factors[signal_mask == False,:] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): target.navigation_mask = navigation_mask if reproject not in ('both', 'navigation'): scores = np.zeros((dc.shape[0], target.scores.shape[1])) scores[navigation_mask == True,:] = target.scores scores[navigation_mask == False,:] = np.nan target.scores = scores #undo any pre-treatments self.undo_treatments(on_peaks) if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False
ripple, tiff) io_plugins = [msa, digital_micrograph, fei, mrc, ripple, tiff] try: from hyperspy.io_plugins import netcdf io_plugins.append(netcdf) except ImportError: pass # NetCDF is obsolate and is only provided for users who have # old EELSLab files. Therefore, we print no message if it is not # available #~ messages.information('The NetCDF IO features are not available') try: from hyperspy.io_plugins import hdf5 io_plugins.append(hdf5) except ImportError: messages.warning('The HDF5 IO features are not available. ' 'It is highly reccomended to install h5py') try: from hyperspy.io_plugins import image io_plugins.append(image) except ImportError: messages.information('The Image (PIL) IO features are not available') default_write_ext = set() for plugin in io_plugins: if plugin.writes: default_write_ext.add( plugin.file_extensions[plugin.default_extension])