Example #1
0
def overwrite(fname):
    """ If file exists 'fname', ask for overwriting and return True or False,
    else return True.

    """
    if os.path.isfile(fname):
        message = "Overwrite '%s' (y/n)?\n" % fname
        try:
            answer = raw_input(message)
            answer = answer.lower()
            while (answer != 'y') and (answer != 'n'):
                print('Please answer y or n.')
                answer = raw_input(message)
            if answer.lower() == 'y':
                return True
            elif answer.lower() == 'n':
                # print('Operation canceled.')
                return False
        except:
            # We are running in the IPython notebook that does not
            # support raw_input
            information("Your terminal does not support raw input. "
                        "Not overwriting. "
                        "To overwrite the file use `overwrite=True`")
            return False
    else:
        return True
Example #2
0
 def __init__(self, element_subshell, GOS=None):
     # Declare the parameters
     Component.__init__(self, ["intensity", "fine_structure_coeff", "effective_angle", "onset_energy"])
     self.name = element_subshell
     self.element, self.subshell = element_subshell.split("_")
     self.energy_scale = None
     self.effective_angle.free = False
     self.fine_structure_active = preferences.EELS.fine_structure_active
     self.fine_structure_width = preferences.EELS.fine_structure_width
     self.fine_structure_coeff.ext_force_positive = False
     self.GOS = None
     # Set initial actions
     if GOS is None:
         try:
             self.GOS = HartreeSlaterGOS(element_subshell)
             GOS = "Hartree-Slater"
         except IOError:
             GOS = "hydrogenic"
             messages.information("Hartree-Slater GOS not available" "Using hydrogenic GOS")
     if self.GOS is None:
         if GOS == "Hartree-Slater":
             self.GOS = HartreeSlaterGOS(element_subshell)
         elif GOS == "hydrogenic":
             self.GOS = HydrogenicGOS(element_subshell)
         else:
             raise ValueError("gos must be one of: None, 'hydrogenic'" " or 'Hartree-Slater'")
     self.onset_energy.value = self.GOS.onset_energy
     self.onset_energy.free = False
     self._position = self.onset_energy
     self.free_onset_energy = False
     self.intensity.grad = self.grad_intensity
     self.intensity.value = 1
     self.intensity.bmin = 0.0
     self.intensity.bmax = None
Example #3
0
def load_with_reader(filename, reader, record_by=None, signal=None, **kwds):
    from hyperspy.signals.image import Image
    from hyperspy.signals.spectrum import Spectrum
    from hyperspy.signals.eels import EELSSpectrum
    messages.information(reader.description)
    file_data_list = reader.file_reader(filename, record_by=record_by, **kwds)
    objects = []
    for file_data_dict in file_data_list:
        if record_by is not None:
            file_data_dict['mapped_parameters']['record_by'] = record_by
        # The record_by can still be None if it was not defined by the reader
        if file_data_dict['mapped_parameters']['record_by'] is None:
            print "No data type provided.  Defaulting to image."
            file_data_dict['mapped_parameters']['record_by'] = 'image'

        if signal is not None:
            file_data_dict['mapped_parameters']['signal'] = signal

        if file_data_dict['mapped_parameters']['record_by'] == 'image':
            s = Image(file_data_dict)
        else:
            if file_data_dict['mapped_parameters']['signal'] == 'EELS':
                s = EELSSpectrum(file_data_dict)
            else:
                s = Spectrum(file_data_dict)
        if defaults.plot_on_load is True:
            s.plot()
        objects.append(s)

    if len(objects) == 1:
        objects = objects[0]
    return objects
Example #4
0
def load_with_reader(filename, reader, record_by = None, signal = None,
                     **kwds):
    from hyperspy.signals.image import Image
    from hyperspy.signals.spectrum import Spectrum
    from hyperspy.signals.eels import EELSSpectrum
    messages.information(reader.description)
    file_data_list = reader.file_reader(filename,
                                         record_by=record_by,
                                        **kwds)
    objects = []
    for file_data_dict in file_data_list:
        if record_by is not None:
            file_data_dict['mapped_parameters']['record_by'] = record_by
        # The record_by can still be None if it was not defined by the reader
        if file_data_dict['mapped_parameters']['record_by'] is None:
            print "No data type provided.  Defaulting to image."
            file_data_dict['mapped_parameters']['record_by']= 'image'

        if signal is not None:
            file_data_dict['mapped_parameters']['signal'] = signal

        if file_data_dict['mapped_parameters']['record_by'] == 'image':
            s = Image(file_data_dict)
        else:
            if file_data_dict['mapped_parameters']['signal'] == 'EELS':
                s = EELSSpectrum(file_data_dict)
            else:
                s = Spectrum(file_data_dict)
        if defaults.plot_on_load is True:
            s.plot()
        objects.append(s)

    if len(objects) == 1:
        objects = objects[0]
    return objects
Example #5
0
    def plot_explained_variance_ratio(self, n=50, log = True, on_peaks=False,
                                      ax = None, label = None):
        """Plot the decomposition explained variance ratio vs index number

        Parameters
        ----------
        n : int
            Number of components
        log : bool
            If True, the y axis uses a log scale
        ax : matplotlib.axes instance
            The axes where to plot the figures. If None, a new figure will be
            created
        label: str
            An optional label for the legend
        """
        target = self._get_target(on_peaks)
        if target.explained_variance_ratio is None:
            messages.information(
                'No explained variance ratio information available')
            return 0
        if n > target.explained_variance_ratio.shape[0]:
            n = target.explained_variance_ratio.shape[0]
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
        ax.plot(range(n), target.explained_variance_ratio[:n], 'o', label=label)
        if log is True:
            ax.semilogy()
        ax.set_ylabel('Explained variance ratio')
        ax.set_xlabel('Principal component index')
        plt.legend()
        plt.show()
        return ax
Example #6
0
    def __init__(self, element_subshell, GOS=None):
        # Declare the parameters
        Component.__init__(self, [
            'intensity', 'fine_structure_coeff', 'effective_angle',
            'onset_energy'
        ])
        if isinstance(element_subshell, dict):
            self.element = element_subshell['element']
            self.subshell = element_subshell['subshell']
        else:
            self.element, self.subshell = element_subshell.split('_')
        self.name = "_".join([self.element, self.subshell])
        self.energy_scale = None
        self.effective_angle.free = False
        self.fine_structure_active = preferences.EELS.fine_structure_active
        self.fine_structure_width = preferences.EELS.fine_structure_width
        self.fine_structure_coeff.ext_force_positive = False
        self.GOS = None
        # Set initial actions
        if GOS is None:
            try:
                self.GOS = HartreeSlaterGOS(element_subshell)
                GOS = 'Hartree-Slater'
            except IOError:
                GOS = 'hydrogenic'
                messages.information('Hartree-Slater GOS not available. '
                                     'Using hydrogenic GOS')
        if self.GOS is None:
            if GOS == 'Hartree-Slater':
                self.GOS = HartreeSlaterGOS(element_subshell)
            elif GOS == 'hydrogenic':
                self.GOS = HydrogenicGOS(element_subshell)
            else:
                raise ValueError('gos must be one of: None, \'hydrogenic\''
                                 ' or \'Hartree-Slater\'')
        self.onset_energy.value = self.GOS.onset_energy
        self.onset_energy.free = False
        self._position = self.onset_energy
        self.free_onset_energy = False
        self.intensity.grad = self.grad_intensity
        self.intensity.value = 1
        self.intensity.bmin = 0.
        self.intensity.bmax = None

        self._whitelist['GOS'] = ('init', GOS)
        if GOS == 'Hartree-Slater':
            self._whitelist['element_subshell'] = (
                'init', self.GOS.as_dictionary(True))
        elif GOS == 'hydrogenic':
            self._whitelist['element_subshell'] = ('init', element_subshell)
        self._whitelist['fine_structure_active'] = None
        self._whitelist['fine_structure_width'] = None
        self._whitelist['fine_structure_smoothing'] = None
        self.effective_angle.events.value_changed.connect(
            self._integrate_GOS, [])
        self.onset_energy.events.value_changed.connect(self._integrate_GOS, [])
        self.onset_energy.events.value_changed.connect(self._calculate_knots,
                                                       [])
Example #7
0
    def __init__(self, element_subshell, GOS=None):
        # Declare the parameters
        Component.__init__(self,
                           ['intensity',
                            'fine_structure_coeff',
                            'effective_angle',
                            'onset_energy'])
        if isinstance(element_subshell, dict):
            self.element = element_subshell['element']
            self.subshell = element_subshell['subshell']
        else:
            self.element, self.subshell = element_subshell.split('_')
        self.name = "_".join([self.element, self.subshell])
        self.energy_scale = None
        self.effective_angle.free = False
        self.fine_structure_active = preferences.EELS.fine_structure_active
        self.fine_structure_width = preferences.EELS.fine_structure_width
        self.fine_structure_coeff.ext_force_positive = False
        self.GOS = None
        # Set initial actions
        if GOS is None:
            try:
                self.GOS = HartreeSlaterGOS(element_subshell)
                GOS = 'Hartree-Slater'
            except IOError:
                GOS = 'hydrogenic'
                messages.information(
                    'Hartree-Slater GOS not available. '
                    'Using hydrogenic GOS')
        if self.GOS is None:
            if GOS == 'Hartree-Slater':
                self.GOS = HartreeSlaterGOS(element_subshell)
            elif GOS == 'hydrogenic':
                self.GOS = HydrogenicGOS(element_subshell)
            else:
                raise ValueError(
                    'gos must be one of: None, \'hydrogenic\''
                    ' or \'Hartree-Slater\'')
        self.onset_energy.value = self.GOS.onset_energy
        self.onset_energy.free = False
        self._position = self.onset_energy
        self.free_onset_energy = False
        self.intensity.grad = self.grad_intensity
        self.intensity.value = 1
        self.intensity.bmin = 0.
        self.intensity.bmax = None

        self._whitelist['GOS'] = ('init', GOS)
        if GOS == 'Hartree-Slater':
            self._whitelist['element_subshell'] = (
                'init',
                self.GOS.as_dictionary(True))
        elif GOS == 'hydrogenic':
            self._whitelist['element_subshell'] = ('init', element_subshell)
        self._whitelist['fine_structure_active'] = None
        self._whitelist['fine_structure_width'] = None
        self._whitelist['fine_structure_smoothing'] = None
Example #8
0
    def normalize_poissonian_noise(self, navigation_mask=None,
                                   signal_mask=None):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212
        to "normalize" the poissonian data for decomposition analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably)"
            " Poissonian noise")
        refold = self.unfold_if_multidim()
        # The rest of the code assumes that the first data axis
        # is the navigation axis. We transpose the data if that is not the
        # case.
        dc = (self.data if self.axes_manager[0].index_in_array == 0
              else self.data.T)
        if navigation_mask is None:
            navigation_mask = slice(None)
        else:
            navigation_mask = ~navigation_mask.ravel()
        if signal_mask is None:
            signal_mask = slice(None)
        else:
            signal_mask = ~signal_mask
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[:, signal_mask][navigation_mask, :].sum(1).squeeze()
        bH = dc[:, signal_mask][navigation_mask, :].sum(0).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            raise ValueError(
                "Data error: negative values\n"
                "Are you sure that the data follow a poissonian "
                "distribution?")

        self._root_aG = np.sqrt(aG)[:, np.newaxis]
        self._root_bH = np.sqrt(bH)[np.newaxis, :]
        # We first disable numpy's warning when the result of an
        # operation produces nans
        np.seterr(invalid='ignore')
        dc[:, signal_mask][navigation_mask, :] /= (self._root_aG *
                                                   self._root_bH)
        # Enable numpy warning
        np.seterr(invalid=None)
        # Set the nans resulting from 0/0 to zero
        dc[:, signal_mask][navigation_mask, :] = \
            np.nan_to_num(dc[:, signal_mask][navigation_mask, :])

        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
Example #9
0
    def normalize_poissonian_noise(self, navigation_mask=None,
                                   signal_mask=None):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212
        to "normalize" the poissonian data for decomposition analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably)"
            " Poissonian noise")
        refold = self.unfold()
        # The rest of the code assumes that the first data axis
        # is the navigation axis. We transpose the data if that is not the
        # case.
        dc = (self.data if self.axes_manager[0].index_in_array == 0
              else self.data.T)
        if navigation_mask is None:
            navigation_mask = slice(None)
        else:
            navigation_mask = ~navigation_mask.ravel()
        if signal_mask is None:
            signal_mask = slice(None)
        else:
            signal_mask = ~signal_mask
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[:, signal_mask][navigation_mask, :].sum(1).squeeze()
        bH = dc[:, signal_mask][navigation_mask, :].sum(0).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            raise ValueError(
                "Data error: negative values\n"
                "Are you sure that the data follow a poissonian "
                "distribution?")

        self._root_aG = np.sqrt(aG)[:, np.newaxis]
        self._root_bH = np.sqrt(bH)[np.newaxis, :]
        # We first disable numpy's warning when the result of an
        # operation produces nans
        np.seterr(invalid='ignore')
        dc[:, signal_mask][navigation_mask, :] /= (self._root_aG *
                                                   self._root_bH)
        # Enable numpy warning
        np.seterr(invalid=None)
        # Set the nans resulting from 0/0 to zero
        dc[:, signal_mask][navigation_mask, :] = \
            np.nan_to_num(dc[:, signal_mask][navigation_mask, :])

        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
Example #10
0
 def unfold_signal_space(self):
     """Modify the shape of the data to obtain a signal space of
     dimension 1
     """
     if self.axes_manager.signal_dimension < 2:
         messages.information('Nothing done, the signal dimension was '
                              'already 1')
         return False
     steady_axes = [
         axis.index_in_array for axis in self.axes_manager._non_slicing_axes
     ]
     unfolded_axis = self.axes_manager._slicing_axes[-1].index_in_array
     self._unfold(steady_axes, unfolded_axis)
Example #11
0
 def unfold_signal_space(self):
     """Modify the shape of the data to obtain a signal space of
     dimension 1
     """
     if self.axes_manager.signal_dimension < 2:
         messages.information('Nothing done, the signal dimension was '
                             'already 1')
         return False
     steady_axes = [
                     axis.index_in_array for axis in
                     self.axes_manager._non_slicing_axes]
     unfolded_axis = self.axes_manager._slicing_axes[-1].index_in_array
     self._unfold(steady_axes, unfolded_axis)
Example #12
0
    def __init__(self, element_subshell, GOS=None):
        # Declare the parameters
        Component.__init__(self, ["intensity", "fine_structure_coeff", "effective_angle", "onset_energy"])
        if isinstance(element_subshell, dict):
            self.element = element_subshell["element"]
            self.subshell = element_subshell["subshell"]
        else:
            self.element, self.subshell = element_subshell.split("_")
        self.name = "_".join([self.element, self.subshell])
        self.energy_scale = None
        self.effective_angle.free = False
        self.fine_structure_active = preferences.EELS.fine_structure_active
        self.fine_structure_width = preferences.EELS.fine_structure_width
        self.fine_structure_coeff.ext_force_positive = False
        self.GOS = None
        # Set initial actions
        if GOS is None:
            try:
                self.GOS = HartreeSlaterGOS(element_subshell)
                GOS = "Hartree-Slater"
            except IOError:
                GOS = "hydrogenic"
                messages.information("Hartree-Slater GOS not available. " "Using hydrogenic GOS")
        if self.GOS is None:
            if GOS == "Hartree-Slater":
                self.GOS = HartreeSlaterGOS(element_subshell)
            elif GOS == "hydrogenic":
                self.GOS = HydrogenicGOS(element_subshell)
            else:
                raise ValueError("gos must be one of: None, 'hydrogenic'" " or 'Hartree-Slater'")
        self.onset_energy.value = self.GOS.onset_energy
        self.onset_energy.free = False
        self._position = self.onset_energy
        self.free_onset_energy = False
        self.intensity.grad = self.grad_intensity
        self.intensity.value = 1
        self.intensity.bmin = 0.0
        self.intensity.bmax = None

        self._whitelist["GOS"] = ("init", GOS)
        if GOS == "Hartree-Slater":
            self._whitelist["element_subshell"] = ("init", self.GOS.as_dictionary(True))
        elif GOS == "hydrogenic":
            self._whitelist["element_subshell"] = ("init", element_subshell)
        self._whitelist["fine_structure_active"] = None
        self._whitelist["fine_structure_width"] = None
        self._whitelist["fine_structure_smoothing"] = None
        self.effective_angle.events.value_changed.connect(self._integrate_GOS, [])
        self.onset_energy.events.value_changed.connect(self._integrate_GOS, [])
        self.onset_energy.events.value_changed.connect(self._calculate_knots, [])
Example #13
0
def load_with_reader(filename, reader, record_by=None,
        signal_type=None, output_level=1, **kwds):
    from hyperspy.signals.image import Image
    from hyperspy.signals.spectrum import Spectrum
    from hyperspy.signals.eels import EELSSpectrum
    if output_level>1:
        messages.information('Loading %s ...' % filename)
    
    file_data_list = reader.file_reader(filename,
                                        record_by=record_by,
                                        output_level=output_level,
                                        **kwds)
    objects = []
    for file_data_dict in file_data_list:
        if record_by is not None:
            file_data_dict['mapped_parameters']['record_by'] = record_by
        # The record_by can still be None if it was not defined by the reader
        if file_data_dict['mapped_parameters']['record_by'] is None:
            print "No data type provided.  Defaulting to image."
            file_data_dict['mapped_parameters']['record_by']= 'image'

        if signal_type is not None:
            file_data_dict['mapped_parameters']['signal_type'] = signal_type

        if file_data_dict['mapped_parameters']['record_by'] == 'image':
            s = Image(file_data_dict)
        else:
            if ('signal_type' in file_data_dict['mapped_parameters'] 
                and file_data_dict['mapped_parameters']['signal_type'] 
                == 'EELS'):
                s = EELSSpectrum(file_data_dict)
            else:
                s = Spectrum(file_data_dict)
        folder, filename = os.path.split(os.path.abspath(filename))
        filename, extension = os.path.splitext(filename)
        s.tmp_parameters.folder = folder
        s.tmp_parameters.filename = filename
        s.tmp_parameters.extension = extension.replace('.','')
        objects.append(s)
        s.print_summary()

    if len(objects) == 1:
        objects = objects[0]
    if output_level>1:
        messages.information('%s correctly loaded' % filename)
    return objects
Example #14
0
 def multifit(self, mask = None, fitter = None, 
              charge_only_fixed = False, grad = False, autosave = False, 
              autosave_every = 10, bounded = False, **kwargs):
     
     if fitter is None:
         fitter = preferences.Model.default_fitter
         print('Fitter: %s' % fitter) 
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(prefix = 'hyperspy_autosave-', 
         dir = '.', suffix = '.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information(
         "Autosaving each %s pixels to %s.npz" % (autosave_every, 
                                                  autosave_fn))
         messages.information(
         "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
        messages.warning_exit(
        "The mask must be an array with the same espatial dimensions as the" 
        "navigation shape, %s" % self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     pbar = progressbar.progressbar(
     maxval = (np.cumprod(self.axes_manager.navigation_shape)[-1] - 
     masked_elements))
     if bounded is True:
         if fitter == 'mpfit':
             self.set_mpfit_parameters_info()
             bounded = None
         elif fitter in ("tnc", "l_bfgs_b"):
             self.set_boundaries()
             bounded = None
         else:
             messages.information(
             "The chosen fitter does not suppport bounding."
             "If you require boundinig please select one of the following"
             "fitters instead: mpfit, tnc, l_bfgs_b")
             bounded = False
     i = 0
     for index in np.ndindex(tuple(self.axes_manager.navigation_shape)):
         if mask is None or not mask[index]:
             self.axes_manager.set_not_slicing_indexes(index)
             self.charge(only_fixed = charge_only_fixed)
             self.fit(fitter = fitter, grad = grad, bounded = bounded, 
                      **kwargs)
             i += 1
             pbar.update(i)
         if autosave is True and i % autosave_every  == 0:
             self.save_parameters2file(autosave_fn)
     pbar.finish()
     if autosave is True:
         messages.information(
         'Deleting the temporary file %s pixels' % (autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #15
0
 def __init__(self, element_subshell, GOS=None):
     # Declare the parameters
     Component.__init__(self,
         ['intensity',
          'fine_structure_coeff',
          'effective_angle',
          'onset_energy'])
     self.name = element_subshell
     self.element, self.subshell = element_subshell.split('_')
     self.energy_scale = None
     self.effective_angle.free = False
     self.fine_structure_active = preferences.EELS.fine_structure_active
     self.fine_structure_width = preferences.EELS.fine_structure_width
     self.fine_structure_coeff.ext_force_positive = False
     self.GOS = None
     # Set initial actions
     if GOS is None:
         try:
             self.GOS = HartreeSlaterGOS(element_subshell)
             GOS = 'Hartree-Slater'
         except IOError:
             GOS = 'hydrogenic'
             messages.information(
                 'Hartree-Slater GOS not available'
                 'Using hydrogenic GOS')
     if self.GOS is None:
         if GOS=='Hartree-Slater':
             self.GOS = HartreeSlaterGOS(element_subshell)
         elif GOS == 'hydrogenic':
             self.GOS = HydrogenicGOS(element_subshell)
         else:
             raise ValueError(
                 'gos must be one of: None, \'hydrogenic\''
                               ' or \'Hartree-Slater\'')
     self.onset_energy.value = self.GOS.onset_energy
     self.onset_energy.free = False
     self._position = self.onset_energy
     self.free_onset_energy = False        
     self.intensity.grad = self.grad_intensity
     self.intensity.value = 1
     self.intensity.bmin = 0.
     self.intensity.bmax = None
Example #16
0
    def plot_explained_variance_ratio(self, n=50, log = True,
                                      ax = None, label = None):
        """Plot the decomposition explained variance ratio vs index number

        Parameters
        ----------
        n : int
            Number of components
        log : bool
            If True, the y axis uses a log scale
        ax : matplotlib.axes instance
            The axes where to plot the figures. If None, a new figure will be
            created
        label: str
            An optional label for the legend
            
        Returns
        -------
        The axe of the plot, that can be passed to the method again in
        a future call using the ax attribute
        
        """
        target = self.learning_results
        if target.explained_variance_ratio is None:
            messages.information(
                'No explained variance ratio information available')
            return 0
        if n > target.explained_variance_ratio.shape[0]:
            n = target.explained_variance_ratio.shape[0]
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
        ax.plot(range(n), target.explained_variance_ratio[:n], 'o',
            label=label)
        if log is True:
            ax.semilogy()
        ax.set_ylabel('Explained variance ratio')
        ax.set_xlabel('Principal component index')
        plt.legend()
        plt.show()
        return ax
Example #17
0
File: io.py Project: csb60/hyperspy
def load_with_reader(filename, reader, record_by = None, signal_type = None,
                     output_level=1, **kwds):
    from hyperspy.signals.image import Image
    from hyperspy.signals.spectrum import Spectrum
    from hyperspy.signals.eels import EELSSpectrum
    if output_level>1:
        messages.information('Loading %s ...' % filename)
    
    file_data_list = reader.file_reader(filename,
                                         record_by=record_by,
                                        output_level=output_level,
                                        **kwds)
    objects = []
    for file_data_dict in file_data_list:
        if record_by is not None:
            file_data_dict['mapped_parameters']['record_by'] = record_by
        # The record_by can still be None if it was not defined by the reader
        if file_data_dict['mapped_parameters']['record_by'] is None:
            print "No data type provided.  Defaulting to image."
            file_data_dict['mapped_parameters']['record_by']= 'image'

        if signal_type is not None:
            file_data_dict['mapped_parameters']['signal_type'] = signal_type

        if file_data_dict['mapped_parameters']['record_by'] == 'image':
            s = Image(file_data_dict)
        else:
            if 'signal_type' in file_data_dict['mapped_parameters'] and \
                file_data_dict['mapped_parameters']['signal_type'] == 'EELS':
                s = EELSSpectrum(file_data_dict)
            else:
                s = Spectrum(file_data_dict)
        objects.append(s)
        print s

    if len(objects) == 1:
        objects = objects[0]
    if output_level>1:
        messages.information('%s correctly loaded' % filename)
    return objects
Example #18
0
def load_with_reader(filename, reader, record_by = None, signal_type = None,
                     output_level=1, is_agg = False, **kwds):
    from hyperspy.signals.image import Image
    from hyperspy.signals.spectrum import Spectrum
    from hyperspy.signals.eels import EELSSpectrum
    if output_level>1:
        messages.information(reader.description)
    file_data_list = reader.file_reader(filename,
                                         record_by=record_by,
                                        output_level=output_level,
                                        **kwds)
    objects = []
    for file_data_dict in file_data_list:
        if record_by is not None:
            file_data_dict['mapped_parameters']['record_by'] = record_by
        # The record_by can still be None if it was not defined by the reader
        if file_data_dict['mapped_parameters']['record_by'] is None:
            print "No data type provided.  Defaulting to image."
            file_data_dict['mapped_parameters']['record_by']= 'image'

        if signal_type is not None:
            file_data_dict['mapped_parameters']['signal_type'] = signal_type

        if file_data_dict['mapped_parameters']['record_by'] == 'image':
            s = Image(file_data_dict)
        else:
            if 'signal_type' in file_data_dict['mapped_parameters'] and \
                file_data_dict['mapped_parameters']['signal_type'] == 'EELS':
                s = EELSSpectrum(file_data_dict)
            else:
                s = Spectrum(file_data_dict)
        objects.append(s)
        print s
        if hyperspy.defaults_parser.preferences.General.plot_on_load is True \
            and is_agg is False:
            s.plot()
    if len(objects) == 1:
        objects = objects[0]
    return objects
Example #19
0
    def plot_explained_variance_ratio(self, n=50, log=True, ax=None, label=None):
        """Plot the decomposition explained variance ratio vs index number

        Parameters
        ----------
        n : int
            Number of components
        log : bool
            If True, the y axis uses a log scale
        ax : matplotlib.axes instance
            The axes where to plot the figures. If None, a new figure will be
            created
        label: str
            An optional label for the legend
            
        Returns
        -------
        The axe of the plot, that can be passed to the method again in
        a future call using the ax attribute
        
        """
        target = self.learning_results
        if target.explained_variance_ratio is None:
            messages.information("No explained variance ratio information available")
            return 0
        if n > target.explained_variance_ratio.shape[0]:
            n = target.explained_variance_ratio.shape[0]
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
        ax.plot(range(n), target.explained_variance_ratio[:n], "o", label=label)
        if log is True:
            ax.semilogy()
        ax.set_ylabel("Explained variance ratio")
        ax.set_xlabel("Principal component index")
        plt.legend()
        plt.show()
        return ax
Example #20
0
 def multifit(self,
              mask=None,
              fitter="leastsq",
              charge_only_fixed=False,
              grad=False,
              autosave=False,
              autosave_every=10,
              **kwargs):
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(prefix='hyperspy_autosave-',
                                            dir='.',
                                            suffix='.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information("Autosaving each %s pixels to %s.npz" %
                              (autosave_every, autosave_fn))
         messages.information(
             "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
         messages.warning_exit(
             "The mask must be an array with the same espatial dimensions as the"
             "navigation shape, %s" % self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     pbar = progressbar.progressbar(
         maxval=(np.cumprod(self.axes_manager.navigation_shape)[-1] -
                 masked_elements))
     i = 0
     for index in np.ndindex(tuple(self.axes_manager.navigation_shape)):
         if mask is None or not mask[index]:
             self.axes_manager.set_not_slicing_indexes(index)
             self.charge(only_fixed=charge_only_fixed)
             self.fit(fitter=fitter, grad=grad, **kwargs)
             i += 1
             pbar.update(i)
         if autosave is True and i % autosave_every == 0:
             self.save_parameters2file(autosave_fn)
     pbar.finish()
     if autosave is True:
         messages.information('Deleting the temporary file %s pixels' %
                              (autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #21
0
 def multifit(self, mask = None, fitter = "leastsq", 
              charge_only_fixed = False, grad = False, autosave = False, 
              autosave_every = 10, **kwargs):
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(prefix = 'hyperspy_autosave-', 
         dir = '.', suffix = '.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information(
         "Autosaving each %s pixels to %s.npz" % (autosave_every, 
                                                  autosave_fn))
         messages.information(
         "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
        messages.warning_exit(
        "The mask must be an array with the same espatial dimensions as the" 
        "navigation shape, %s" % self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     pbar = progressbar.progressbar(
     maxval = (np.cumprod(self.axes_manager.navigation_shape)[-1] - 
     masked_elements))
     i = 0
     for index in np.ndindex(tuple(self.axes_manager.navigation_shape)):
         if mask is None or not mask[index]:
             self.axes_manager.set_not_slicing_indexes(index)
             self.charge(only_fixed = charge_only_fixed)
             self.fit(fitter = fitter, grad = grad, **kwargs)
             i += 1
             pbar.update(i)
         if autosave is True and i % autosave_every  == 0:
             self.save_parameters2file(autosave_fn)
     pbar.finish()
     if autosave is True:
         messages.information(
         'Deleting the temporary file %s pixels' % (autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #22
0
    def normalize_poissonian_noise(self, navigation_mask = None,
                                   signal_mask = None, return_masks = False):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to
        "normalize" the poissonian data for decomposition analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably) Poissonian noise")
        refold = self.unfold_if_multidim()
        dc = self.data
        if navigation_mask is None:
            navigation_mask = slice(None)
        else:
            navigation_mask = navigation_mask.ravel()
        if signal_mask is None:
            signal_mask = slice(None)
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[:,signal_mask][navigation_mask,:].sum(1).squeeze()
        bH = dc[:,signal_mask][navigation_mask,:].sum(0).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
            "Data error: negative values\n"
            "Are you sure that the data follow a poissonian distribution?")
        # Update the spatial and energy masks so it does not include rows
        # or colums that sum zero.
        aG0 = (aG == 0)
        bH0 = (bH == 0)
        if aG0.any():
            if isinstance(navigation_mask, slice):
                # Convert the slice into a mask before setting its values
                navigation_mask = np.ones((self.data.shape[0]),dtype = 'bool')
            # Set colums summing zero as masked
            navigation_mask[aG0] = False
            aG = aG[aG0 == False]
        if bH0.any():
            if isinstance(signal_mask, slice):
                # Convert the slice into a mask before setting its values
                signal_mask = np.ones((self.data.shape[1]), dtype = 'bool')
            # Set rows summing zero as masked
            signal_mask[bH0] = False
            bH = bH[bH0 == False]
        self._root_aG = np.sqrt(aG)[:, np.newaxis]
        self._root_bH = np.sqrt(bH)[np.newaxis, :]
        dc[:,signal_mask][navigation_mask,:] = \
            (dc[:,signal_mask][navigation_mask,:] /
                (self._root_aG * self._root_bH))
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
        if return_masks is True:
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None
            return navigation_mask, signal_mask
Example #23
0
            program_files = os.environ['PROGRAMFILES(X86)']
            gos_path = os.path.join(program_files, gos)
            if os.path.isdir(gos_path) is False:
                gos_path = os.path.join(config_path, 'EELS_GOS')
    else:
        gos_path = os.path.join(config_path, 'EELS_GOS')
    return gos_path


if os.path.isfile(defaults_file):
    # Remove config file if obsolated
    f = open(defaults_file)
    if 'Not really' in f.readline():
        # It is the old config file
        f.close()
        messages.information('Removing obsoleted config file')
        os.remove(defaults_file)
        defaults_file_exists = False
    else:
        defaults_file_exists = True
else:
    defaults_file_exists = False

# Defaults template definition starts#####################################
# This "section" is all that has to be modified to add or remove sections and
# options from the defaults


class GeneralConfig(t.HasTraits):
    default_file_format = t.Enum('hdf5', 'rpl',
                                 desc='Using the hdf5 format is highly reccomended because is the '
Example #24
0
    def principal_components_analysis(self,
                                      normalize_poissonian_noise=False,
                                      algorithm='svd',
                                      output_dimension=None,
                                      navigation_mask=None,
                                      signal_mask=None,
                                      center=False,
                                      variance2one=False,
                                      var_array=None,
                                      var_func=None,
                                      polyfit=None,
                                      on_peaks=False):
        """Principal components analysis.

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'}
        output_dimension : None or int
            number of PCA to keep
        navigation_mask : boolean numpy array
        signal_mask : boolean numpy array
        center : bool
            Perform energy centering before PCA
        variance2one : bool
            Perform whitening before PCA
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomy.
        polyfit :


        See also
        --------
        plot_principal_components, plot_principal_components_maps, plot_lev

        """
        # backup the original data
        if on_peaks:
            self._data_before_treatments = self.peak_chars.copy()
        else:
            self._data_before_treatments = self.data.copy()
        # Check for conflicting options and correct them when possible
        if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False:
            print \
            """
            The PCA algorithms from the MDP toolking (mdp and NIPALS)
            do not permit deactivating data centering.
            Therefore, the algorithm will proceed to center the data.
            """
            center = True
        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit(
                    "With the mlpca algorithm the output_dimension must be expecified"
                )

        if center is True and normalize_poissonian_noise is True:
            messages.warning(
                "Centering is not compatible with poissonian noise normalization\n"
                "Disabling centering")
            center = False

        if variance2one is True and normalize_poissonian_noise is True:
            messages.warning(
                "Variance normalization is not compatible with poissonian noise"
                "normalization.\n"
                "Disabling variance2one")
            variance2one = False

        # Apply pre-treatments
        # Centering
        if center is True:
            self.energy_center()
        # Variance normalization
        if variance2one is True:
            self.variance2one()
        # Transform the data in a line spectrum
        self._unfolded4pca = self.unfold_if_multidim()
        # Normalize the poissonian noise
        # Note that this function can change the masks
        if normalize_poissonian_noise is True:
            navigation_mask, signal_mask = \
                self.normalize_poissonian_noise(navigation_mask = navigation_mask,
                                                signal_mask = signal_mask,
                                                return_masks = True)

        navigation_mask = self._correct_navigation_mask_when_unfolded(
            navigation_mask)

        messages.information('Performing principal components analysis')

        if on_peaks:
            dc = self.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data.T.squeeze()
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        if algorithm == 'mdp' or algorithm == 'NIPALS':
            if algorithm == 'mdp':
                target.pca_node = mdp.nodes.PCANode(
                    output_dim=output_dimension, svd=True)
            elif algorithm == 'NIPALS':
                target.pca_node = mdp.nodes.NIPALSNode(
                    output_dim=output_dimension)
            # Train the node
            print "\nPerforming the PCA node training"
            print "This include variance normalizing"
            target.pca_node.train(dc[signal_mask, :][:, navigation_mask])
            print "Performing PCA projection"
            pc = target.pca_node.execute(dc[:, navigation_mask])
            pca_v = target.pca_node.v
            pca_V = target.pca_node.d
            target.output_dimension = output_dimension

        elif algorithm == 'svd':
            pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask])
            pc = np.dot(dc[:, navigation_mask], pca_v)
        elif algorithm == 'fast_svd':
            pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask],
                               fast=True,
                               output_dimension=output_dimension)
            pc = np.dot(dc[:, navigation_mask], pca_v)

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                    "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                                     'Supposing poissonian data')
                var_array = dc.squeeze()[signal_mask, :][:, navigation_mask]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                    "You have defined both the var_func and var_array keywords"
                    "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask, ...][:,
                                                              navigation_mask])
                else:
                    try:
                        var_array = np.polyval(
                            polyfit, dc[signal_mask, navigation_mask])
                    except:
                        messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            target.mlpca_output = mlpca(
                dc.squeeze()[signal_mask, :][:, navigation_mask],
                var_array.squeeze(),
                output_dimension,
                fast=fast)
            U, S, V, Sobj, ErrFlag = target.mlpca_output
            print "Performing PCA projection"
            pc = np.dot(dc[:, navigation_mask], V)
            pca_v = V
            pca_V = S**2

        if output_dimension:
            print "trimming to %i dimensions" % output_dimension
            pca_v = pca_v[:, :output_dimension]
            pca_V = pca_V[:output_dimension]
            pc = pc[:, :output_dimension]

        target.pc = pc
        target.v = pca_v
        target.V = pca_V
        target.pca_algorithm = algorithm
        target.centered = center
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4pca
        target.variance2one = variance2one

        if self._unfolded4pca is True:
            target.original_shape = self._shape_before_unfolding

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.pc[signal_mask, :] *= self._root_bH
            target.v *= self._root_aG.T
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None

        #undo any pre-treatments
        self.undo_treatments(on_peaks)

        # Set the pixels that were not processed to nan
        if navigation_mask is not None or not isinstance(
                navigation_mask, slice):
            v = np.zeros((dc.shape[1], target.v.shape[1]),
                         dtype=target.v.dtype)
            v[navigation_mask == False, :] = np.nan
            v[navigation_mask, :] = target.v
            target.v = v

        if self._unfolded4pca is True:
            self.fold()
            self._unfolded4pca is False
Example #25
0
    def principal_components_analysis(self, normalize_poissonian_noise = False,
    algorithm = 'svd', output_dimension = None, navigation_mask = None,
    signal_mask = None, center = False, variance2one = False, var_array = None,
    var_func = None, polyfit = None, on_peaks=False):
        """Principal components analysis.

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'}
        output_dimension : None or int
            number of PCA to keep
        navigation_mask : boolean numpy array
        signal_mask : boolean numpy array
        center : bool
            Perform energy centering before PCA
        variance2one : bool
            Perform whitening before PCA
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomy.
        polyfit :


        See also
        --------
        plot_principal_components, plot_principal_components_maps, plot_lev

        """
        # backup the original data
        if on_peaks:
            self._data_before_treatments = self.peak_chars.copy()
        else:
            self._data_before_treatments = self.data.copy()
        # Check for conflicting options and correct them when possible
        if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False:
            print \
            """
            The PCA algorithms from the MDP toolking (mdp and NIPALS)
            do not permit deactivating data centering.
            Therefore, the algorithm will proceed to center the data.
            """
            center = True
        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit(
                "With the mlpca algorithm the output_dimension must be expecified")

        if center is True and normalize_poissonian_noise is True:
            messages.warning(
            "Centering is not compatible with poissonian noise normalization\n"
            "Disabling centering")
            center = False

        if variance2one is True and normalize_poissonian_noise is True:
            messages.warning(
            "Variance normalization is not compatible with poissonian noise"
            "normalization.\n"
            "Disabling variance2one")
            variance2one = False

        # Apply pre-treatments
        # Centering
        if center is True:
            self.energy_center()
        # Variance normalization
        if variance2one is True:
            self.variance2one()
        # Transform the data in a line spectrum
        self._unfolded4pca = self.unfold_if_multidim()
        # Normalize the poissonian noise
        # Note that this function can change the masks
        if normalize_poissonian_noise is True:
            navigation_mask, signal_mask = \
                self.normalize_poissonian_noise(navigation_mask = navigation_mask,
                                                signal_mask = signal_mask,
                                                return_masks = True)

        navigation_mask = self._correct_navigation_mask_when_unfolded(navigation_mask)

        messages.information('Performing principal components analysis')

        if on_peaks:
            dc=self.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data.T.squeeze()
        #set the output target (peak results or not?)
        target=self._get_target(on_peaks)
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        if algorithm == 'mdp' or algorithm == 'NIPALS':
            if algorithm == 'mdp':
                target.pca_node = mdp.nodes.PCANode(
                output_dim=output_dimension, svd = True)
            elif algorithm == 'NIPALS':
                target.pca_node = mdp.nodes.NIPALSNode(
                output_dim=output_dimension)
            # Train the node
            print "\nPerforming the PCA node training"
            print "This include variance normalizing"
            target.pca_node.train(
                dc[signal_mask,:][:,navigation_mask])
            print "Performing PCA projection"
            pc = target.pca_node.execute(dc[:,navigation_mask])
            pca_v = target.pca_node.v
            pca_V = target.pca_node.d
            target.output_dimension = output_dimension

        elif algorithm == 'svd':
            pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask])
            pc = np.dot(dc[:,navigation_mask], pca_v)
        elif algorithm == 'fast_svd':
            pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask],
            fast = True, output_dimension = output_dimension)
            pc = np.dot(dc[:,navigation_mask], pca_v)

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc.squeeze()[signal_mask,:][:,navigation_mask]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            target.mlpca_output = mlpca(
                dc.squeeze()[signal_mask,:][:,navigation_mask],
                var_array.squeeze(), output_dimension, fast = fast)
            U,S,V,Sobj, ErrFlag  = target.mlpca_output
            print "Performing PCA projection"
            pc = np.dot(dc[:,navigation_mask], V)
            pca_v = V
            pca_V = S ** 2

        if output_dimension:
            print "trimming to %i dimensions"%output_dimension
            pca_v = pca_v[:,:output_dimension]
            pca_V = pca_V[:output_dimension]
            pc = pc[:,:output_dimension]

        target.pc = pc
        target.v = pca_v
        target.V = pca_V
        target.pca_algorithm = algorithm
        target.centered = center
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4pca
        target.variance2one = variance2one

        if self._unfolded4pca is True:
            target.original_shape = self._shape_before_unfolding

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.pc[signal_mask,:] *= self._root_bH
            target.v *= self._root_aG.T
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None

        #undo any pre-treatments
        self.undo_treatments(on_peaks)

        # Set the pixels that were not processed to nan
        if navigation_mask is not None or not isinstance(navigation_mask, slice):
            v = np.zeros((dc.shape[1], target.v.shape[1]),
                    dtype = target.v.dtype)
            v[navigation_mask == False,:] = np.nan
            v[navigation_mask,:] = target.v
            target.v = v

        if self._unfolded4pca is True:
            self.fold()
            self._unfolded4pca is False
Example #26
0
    def normalize_poissonian_noise(self,
                                   navigation_mask=None,
                                   signal_mask=None,
                                   return_masks=False):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to
        "normalize" the poissonian data for PCA analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably) Poissonian noise")
        # If energy axis is not first, it needs to be for MVA.
        refold = self.unfold_if_multidim()
        dc = self.data.T.squeeze().copy()
        navigation_mask = \
            self._correct_navigation_mask_when_unfolded(navigation_mask)
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[signal_mask, :][:, navigation_mask].sum(0).squeeze()
        bH = dc[signal_mask, :][:, navigation_mask].sum(1).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
                "Data error: negative values\n"
                "Are you sure that the data follow a poissonian distribution?")
        # Update the spatial and energy masks so it does not include rows
        # or colums that sum zero.
        aG0 = (aG == 0)
        bH0 = (bH == 0)
        if aG0.any():
            if isinstance(navigation_mask, slice):
                # Convert the slice into a mask before setting its values
                navigation_mask = np.ones((self.data.shape[1]), dtype='bool')
            # Set colums summing zero as masked
            navigation_mask[aG0] = False
            aG = aG[aG0 == False]
        if bH0.any():
            if isinstance(signal_mask, slice):
                # Convert the slice into a mask before setting its values
                signal_mask = np.ones((self.data.shape[0]), dtype='bool')
            # Set rows summing zero as masked
            signal_mask[bH0] = False
            bH = bH[bH0 == False]
        self._root_aG = np.sqrt(aG)[np.newaxis, :]
        self._root_bH = np.sqrt(bH)[:, np.newaxis]
        temp = (dc[signal_mask, :][:, navigation_mask] /
                (self._root_aG * self._root_bH))
        if isinstance(signal_mask, slice) or isinstance(
                navigation_mask, slice):
            dc[signal_mask, navigation_mask] = temp
        else:
            mask3D = signal_mask[:, np.newaxis] * \
                navigation_mask[np.newaxis, :]
            dc[mask3D] = temp.ravel()
        # TODO - dc was never modifying self.data - was normalization ever
        # really getting applied?  Comment next lines as necessary.
        self.data = dc.T.copy()
        # end normalization write to self.data.
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
        if return_masks is True:
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None
            return navigation_mask, signal_mask
Example #27
0
File: io.py Project: kif/hyperspy
def load(filenames=None,
         record_by=None,
         signal_type=None,
         signal_origin=None,
         stack=False,
         stack_axis=None,
         new_axis_name="stack_element",
         mmap=False,
         mmap_dir=None,
         **kwds):
    """
    Load potentially multiple supported file into an hyperspy structure
    Supported formats: HDF5, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5, tif and a number of image formats.
    
    Any extra keyword is passed to the corresponsing reader. For 
    available options see their individual documentation.
    
    Parameters
    ----------
    filenames :  None, str or list of strings
        The filename to be loaded. If None, a window will open to select
        a file to load. If a valid filename is passed in that single
        file is loaded. If multiple file names are passed in
        a list, a list of objects or a single object containing the data
        of the individual files stacked are returned. This behaviour is
        controlled by the `stack` parameter (see bellow). Multiple
        files can be loaded by using simple shell-style wildcards, 
        e.g. 'my_file*.msa' loads all the files that starts
        by 'my_file' and has the '.msa' extension.
    record_by : {None, 'spectrum', 'image', ""}
        The value provided may determine the Signal subclass assigned to the 
        data.
        If None, the value is read or guessed from the file. Any other value
        overrides the value stored in the file if any.
        If "spectrum" load the data in a Spectrum (sub)class.
        If "image" load the data in an Image (sub)class.
        If "" (empty string) load the data in a Signal class.
        
    signal_type : {None, "EELS", "EDS_TEM", "EDS_SEM", "", str}
        The acronym that identifies the signal type.
        The value provided may determine the Signal subclass assigned to the 
        data.
        If None the value is read/guessed from the file. Any other value
        overrides the value stored in the file if any.
        For electron energy-loss spectroscopy use "EELS".
        For energy dispersive x-rays use "EDS_TEM" 
        if acquired from an electron-transparent sample — as it is usually 
        the case in a transmission electron  microscope (TEM) —,
        "EDS_SEM" if acquired from a non electron-transparent sample 
        — as it is usually the case in a scanning electron  microscope (SEM) —.
        If "" (empty string) the value is not read from the file and is 
        considered undefined. 
    signal_origin : {None, "experiment", "simulation", ""}
        Defines the origin of the signal.
        The value provided may determine the Signal subclass assigned to the 
        data.
        If None the value is read/guessed from the file. Any other value
        overrides the value stored in the file if any.
        Use "experiment" if loading experimental data.
        Use "simulation" if loading simulated data.
        If "" (empty string) the value is not read from the file and is 
        considered undefined. 
    stack : bool
        If True and multiple filenames are passed in, stacking all
        the data into a single object is attempted. All files must match
        in shape. It is possible to store the data in a memory mapped
        temporary file instead of in memory setting mmap_mode. The title is set
        to the name of the folder containing the files.
    stack_axis : {None, int, str}
        If None, the signals are stacked over a new axis. The data must 
        have the same dimensions. Otherwise the 
        signals are stacked over the axis given by its integer index or
        its name. The data must have the same shape, except in the dimension
        corresponding to `axis`.
    new_axis_name : string
        The name of the new axis when `axis` is None.
        If an axis with this name already 
        exists it automatically append '-i', where `i` are integers,
        until it finds a name that is not yet in use.
        
    mmap: bool
        If True and stack is True, then the data is stored
        in a memory-mapped temporary file.The memory-mapped data is 
        stored on disk, and not directly loaded into memory.  
        Memory mapping is especially useful for accessing small 
        fragments of large files without reading the entire file into 
        memory.
    mmap_dir : string
        If mmap_dir is not None, and stack and mmap are True, the memory
        mapped file will be created in the given directory,
        otherwise the default directory is used.
        
    Returns
    -------
    Signal instance or list of signal instances

    Examples
    --------
    Loading a single file providing the signal type:
    
    >>> d = load('file.dm3', signal_type='EDS_TEM')
    
    Loading a single file and overriding its default record_by:
    
    >>> d = load('file.dm3', record_by='Image')
    
    Loading multiple files:
    
    >>> d = load('file1.dm3','file2.dm3')
    
    Loading multiple files matching the pattern:
    
    >>>d = load('file*.dm3')

    """
    kwds['record_by'] = record_by
    kwds['signal_type'] = signal_type
    kwds['signal_origin'] = signal_origin
    if filenames is None:
        if hyperspy.defaults_parser.preferences.General.interactive is True:
            from hyperspy.gui.tools import Load
            load_ui = Load()
            load_ui.edit_traits()
            if load_ui.filename:
                filenames = load_ui.filename
        else:
            raise ValueError("No file provided to reader and "
                             "interactive mode is disabled")
        if filenames is None:
            raise ValueError("No file provided to reader")

    if isinstance(filenames, basestring):
        filenames = natsorted(
            [f for f in glob.glob(filenames) if os.path.isfile(f)])
        if not filenames:
            raise ValueError('No file name matches this pattern')
    elif not isinstance(filenames, (list, tuple)):
        raise ValueError(
            'The filenames parameter must be a list, tuple, string or None')
    if not filenames:
        raise ValueError('No file provided to reader.')
        return None
    else:
        if len(filenames) > 1:
            messages.information('Loading individual files')
        if stack is True:
            signal = []
            for i, filename in enumerate(filenames):
                obj = load_single_file(filename, **kwds)
                signal.append(obj)
            signal = hyperspy.utils.stack(signal,
                                          axis=stack_axis,
                                          new_axis_name=new_axis_name,
                                          mmap=mmap,
                                          mmap_dir=mmap_dir)
            signal.mapped_parameters.title = \
                os.path.split(
                    os.path.split(
                        os.path.abspath(filenames[0])
                                 )[0]
                              )[1]
            messages.information('Individual files loaded correctly')
            signal._print_summary()
            objects = [
                signal,
            ]
        else:
            objects = [
                load_single_file(filename, **kwds) for filename in filenames
            ]

        if hyperspy.defaults_parser.preferences.General.plot_on_load:
            for obj in objects:
                obj.plot()
        if len(objects) == 1:
            objects = objects[0]
    return objects
Example #28
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      centre=None,
                      auto_transpose=True,
                      navigation_mask=None,
                      signal_mask=None,
                      var_array=None,
                      var_func=None,
                      polyfit=None,
                      reproject=None,
                      **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise

        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'

        output_dimension : None or int
            number of components to keep/calculate

        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be
            performed in the 'trials' axis. It only has effect when using the
            svd or fast_svd algorithms

        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.

        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the
            decompostion.

        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the
            decomposition.

        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm

        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.

        polyfit :

        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']:  # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float '
                'type. You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return

        if self.axes_manager.navigation_size < 2:
            raise AttributeError("It is not possible to decompose a dataset "
                                 "with navigation_size < 2")
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                raise ValueError("With the mlpca algorithm the "
                                 "output_dimension must be expecified")

        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                    navigation_mask=navigation_mask,
                    signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')
            # The rest of the code assumes that the first data axis
            # is the navigation axis. We transpose the data if that is not the
            # case.
            dc = (self.data if self.axes_manager[0].index_in_array == 0
                  else self.data.T)
            # set the output target (peak results or not?)
            target = self.learning_results

            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask

            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the
            # stored value (at the end of the method) coincides with the
            # input masks

            # Reset the explained_variance which is not set by all the
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None

            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :], centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'

            elif algorithm == 'nmf':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T

            elif algorithm == 'sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T

            elif algorithm == 'mini_batch_sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    raise ValueError(
                        "For MLPCA it is mandatory to define the "
                        "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                                         'Supposing poissonian data')
                    var_array = dc[:, signal_mask][navigation_mask, :]

                if var_array is not None and var_func is not None:
                    raise ValueError(
                        "You have defined both the var_func and var_array "
                        "keywords."
                        "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask, ...][:, navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(
                                polyfit, dc[
                                    signal_mask, navigation_mask])
                        except:
                            raise ValueError(
                                'var_func must be either a function or an '
                                'array defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U, S, V, Sobj, ErrFlag = mlpca(
                    dc[:, signal_mask][navigation_mask, :],
                    var_array, output_dimension, fast=fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                 'Nothing done')

            # We must calculate the ratio here because otherwise the sum
            # information can be lost if the user call
            # crop_decomposition_dimension
            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)

            # Delete the unmixing information, because it'll refer to a
            # previous decomposition
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.metadata._HyperSpy.Folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:, signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:, signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings),
                                     dc[navigation_mask, :] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None

            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG

            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask, :] = target.factors
                    factors[~signal_mask, :] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros(
                        (dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask, :] = target.loadings
                    loadings[~navigation_mask, :] = np.nan
                    target.loadings = loadings
        finally:
            # undo any pre-treatments
            self.undo_treatments()

            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Example #29
0
File: io.py Project: csb60/hyperspy
def load(filenames=None, record_by=None, signal_type=None, 
         stack=False, mmap=False, mmap_dir=None, **kwds):
    """
    Load potentially multiple supported file into an hyperspy structure
    Supported formats: HDF5, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5, tif and a number of image formats.
    
    Any extra keyword is passed to the corresponsing reader. For 
    available options see their individual documentation.
    
    Parameters
    ----------
    filenames :  None, str or list of strings
        The filename to be loaded. If None, a window will open to select
        a file to load. If a valid filename is passed in that single
        file is loaded. If multiple file names are passed in
        a list, a list of objects or a single object containing the data
        of the individual files stacked are returned. This behaviour is
        controlled by the `stack` parameter (see bellow). Multiple
        files can be loaded by using simple shell-style wildcards, 
        e.g. 'my_file*.msa' loads all the files that starts
        by 'my_file' and has the '.msa' extension.
    record_by : None | 'spectrum' | 'image' 
        Manually set the way in which the data will be read. Possible
        values are 'spectrum' or 'image'.
    signal_type : str
        Manually set the signal type of the data. Although only setting
        signal type to 'EELS' will currently change the way the data is 
        loaded, it is good practice to set this parameter so it can be 
        stored when saving the file. Please note that, if the 
        signal_type is already defined in the file the information 
        will be overriden without warning.
    stack : bool
        If True and multiple filenames are passed in, stacking all
        the data into a single object is attempted. All files must match
        in shape. It is possible to store the data in a memory mapped
        temporary file instead of in memory setting mmap_mode.
        
    mmap: bool
        If True and stack is True, then the data is stored
        in a memory-mapped temporary file.The memory-mapped data is 
        stored on disk, and not directly loaded into memory.  
        Memory mapping is especially useful for accessing small 
        fragments of large files without reading the entire file into 
        memory.
    mmap_dir : string
        If mmap_dir is not None, and stack and mmap are True, the memory
        mapped file will be created in the given directory,
        otherwise the default directory is used.
        
    Returns
    -------
    Signal instance or list of signal instances

    Examples
    --------
    Loading a single file providing the signal type:
    
    >>> d = load('file.dm3', signal_type='XPS')
    
    Loading a single file and overriding its default record_by:
    
    >>> d = load('file.dm3', record_by='Image')
    
    Loading multiple files:
    
    >>> d = load('file1.dm3','file2.dm3')
    
    Loading multiple files matching the pattern:
    
    >>>d = load('file*.dm3')

    """
    if filenames is None:
        if hyperspy.defaults_parser.preferences.General.interactive is True:
            load_ui = Load()
            load_ui.edit_traits()
            if load_ui.filename:
                filenames = load_ui.filename
        else:
            raise ValueError("No file provided to reader and "
            "interactive mode is disabled")
        if filenames is None:
            raise ValueError("No file provided to reader")
        
    if isinstance(filenames, basestring):
        filenames=natsorted([f for f in glob.glob(filenames)
                             if os.path.isfile(f)])
        if not filenames:
            raise ValueError('No file name matches this pattern')
    elif not isinstance(filenames, (list, tuple)):
        raise ValueError(
        'The filenames parameter must be a list, tuple, string or None')
    if not filenames:
        raise ValueError('No file provided to reader.')
        return None
    else:
        if len(filenames) > 1:
            messages.information('Loading individual files')
        if stack is True:
            original_shape = None
            for i, filename in enumerate(filenames):
                obj = load_single_file(filename, output_level=0,**kwds)
                if original_shape is None:
                    original_shape = obj.data.shape
                    record_by = obj.mapped_parameters.record_by
                    stack_shape = tuple([len(filenames),]) + original_shape
                    tempf = None
                    if mmap is False:
                        data = np.empty(stack_shape,
                                           dtype=obj.data.dtype)
                    else:
                        #filename = os.path.join(tempfile.mkdtemp(),
                                             #'newfile.dat')
                        tempf = tempfile.NamedTemporaryFile(
                                                        dir=mmap_dir)
                        data = np.memmap(tempf,
                                         dtype=obj.data.dtype,
                                         mode = 'w+',
                                         shape=stack_shape,)
                    signal = type(obj)(
                        {'data' : data})
                    # Store the temporary file in the signal class to
                    # avoid its deletion when garbage collecting
                    if tempf is not None:
                        signal._data_temporary_file = tempf
                    signal.axes_manager.axes[1:] = obj.axes_manager.axes
                    signal.axes_manager._set_axes_index_in_array_from_position()
                    eaxis = signal.axes_manager.axes[0]
                    eaxis.name = 'stack_element'
                    eaxis.navigate = True
                    signal.mapped_parameters = obj.mapped_parameters
                    signal.mapped_parameters.original_filename = ''
                    signal.mapped_parameters.title = \
                    os.path.split(os.path.split(
                        os.path.abspath(filenames[0]))[0])[1]
                    signal.original_parameters = DictionaryBrowser({})
                    signal.original_parameters.add_node('stack_elements')
                if obj.data.shape != original_shape:
                    raise IOError(
                "Only files with data of the same shape can be stacked")
                
                signal.data[i,...] = obj.data
                signal.original_parameters.stack_elements.add_node(
                    'element%i' % i)
                node = signal.original_parameters.stack_elements[
                    'element%i' % i]
                node.original_parameters = \
                    obj.original_parameters.as_dictionary()
                node.mapped_parameters = \
                    obj.mapped_parameters.as_dictionary()
                del obj
            messages.information('Individual files loaded correctly')
            print signal
            objects = [signal,]
        else:
            objects=[load_single_file(filename, output_level=0,**kwds) 
                for filename in filenames]
            
        if hyperspy.defaults_parser.preferences.General.plot_on_load:
            for obj in objects:
                obj.plot()
        if len(objects) == 1:
            objects = objects[0]
    return objects
Example #30
0
def load(filenames=None,
         record_by=None,
         signal_type=None,
         signal_origin=None,
         stack=False,
         stack_axis=None,
         new_axis_name="stack_element",
         mmap=False,
         mmap_dir=None,
         **kwds):
    """
    Load potentially multiple supported file into an hyperspy structure
    Supported formats: HDF5, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5, tif and a number of image formats.

    Any extra keyword is passed to the corresponsing reader. For
    available options see their individual documentation.

    Parameters
    ----------
    filenames :  None, str or list of strings
        The filename to be loaded. If None, a window will open to select
        a file to load. If a valid filename is passed in that single
        file is loaded. If multiple file names are passed in
        a list, a list of objects or a single object containing the data
        of the individual files stacked are returned. This behaviour is
        controlled by the `stack` parameter (see bellow). Multiple
        files can be loaded by using simple shell-style wildcards,
        e.g. 'my_file*.msa' loads all the files that starts
        by 'my_file' and has the '.msa' extension.
    record_by : {None, 'spectrum', 'image', ""}
        The value provided may determine the Signal subclass assigned to the
        data.
        If None, the value is read or guessed from the file. Any other value
        overrides the value stored in the file if any.
        If "spectrum" load the data in a Spectrum (sub)class.
        If "image" load the data in an Image (sub)class.
        If "" (empty string) load the data in a Signal class.

    signal_type : {None, "EELS", "EDS_TEM", "EDS_SEM", "", str}
        The acronym that identifies the signal type.
        The value provided may determine the Signal subclass assigned to the
        data.
        If None the value is read/guessed from the file. Any other value
        overrides the value stored in the file if any.
        For electron energy-loss spectroscopy use "EELS".
        For energy dispersive x-rays use "EDS_TEM"
        if acquired from an electron-transparent sample — as it is usually
        the case in a transmission electron  microscope (TEM) —,
        "EDS_SEM" if acquired from a non electron-transparent sample
        — as it is usually the case in a scanning electron  microscope (SEM) —.
        If "" (empty string) the value is not read from the file and is
        considered undefined.
    signal_origin : {None, "experiment", "simulation", ""}
        Defines the origin of the signal.
        The value provided may determine the Signal subclass assigned to the
        data.
        If None the value is read/guessed from the file. Any other value
        overrides the value stored in the file if any.
        Use "experiment" if loading experimental data.
        Use "simulation" if loading simulated data.
        If "" (empty string) the value is not read from the file and is
        considered undefined.
    stack : bool
        If True and multiple filenames are passed in, stacking all
        the data into a single object is attempted. All files must match
        in shape. It is possible to store the data in a memory mapped
        temporary file instead of in memory setting mmap_mode. The title is set
        to the name of the folder containing the files.
    stack_axis : {None, int, str}
        If None, the signals are stacked over a new axis. The data must
        have the same dimensions. Otherwise the
        signals are stacked over the axis given by its integer index or
        its name. The data must have the same shape, except in the dimension
        corresponding to `axis`.
    new_axis_name : string
        The name of the new axis when `axis` is None.
        If an axis with this name already
        exists it automatically append '-i', where `i` are integers,
        until it finds a name that is not yet in use.

    mmap: bool
        If True and stack is True, then the data is stored
        in a memory-mapped temporary file.The memory-mapped data is
        stored on disk, and not directly loaded into memory.
        Memory mapping is especially useful for accessing small
        fragments of large files without reading the entire file into
        memory.
    mmap_dir : string
        If mmap_dir is not None, and stack and mmap are True, the memory
        mapped file will be created in the given directory,
        otherwise the default directory is used.

    Returns
    -------
    Signal instance or list of signal instances

    Examples
    --------
    Loading a single file providing the signal type:

    >>> d = load('file.dm3', signal_type='EDS_TEM')

    Loading a single file and overriding its default record_by:

    >>> d = load('file.dm3', record_by='Image')

    Loading multiple files:

    >>> d = load('file1.dm3','file2.dm3')

    Loading multiple files matching the pattern:

    >>>d = load('file*.dm3')

    """
    kwds['record_by'] = record_by
    kwds['signal_type'] = signal_type
    kwds['signal_origin'] = signal_origin
    if filenames is None:
        if hyperspy.defaults_parser.preferences.General.interactive is True:
            from hyperspy.gui.tools import Load
            load_ui = Load()
            load_ui.edit_traits()
            if load_ui.filename:
                filenames = load_ui.filename
        else:
            raise ValueError("No file provided to reader and "
                             "interactive mode is disabled")
        if filenames is None:
            raise ValueError("No file provided to reader")

    if isinstance(filenames, basestring):
        filenames = natsorted([f for f in glob.glob(filenames)
                               if os.path.isfile(f)])
        if not filenames:
            raise ValueError('No file name matches this pattern')
    elif not isinstance(filenames, (list, tuple)):
        raise ValueError(
            'The filenames parameter must be a list, tuple, string or None')
    if not filenames:
        raise ValueError('No file provided to reader.')
        return None
    else:
        if len(filenames) > 1:
            messages.information('Loading individual files')
        if stack is True:
            signal = []
            for i, filename in enumerate(filenames):
                obj = load_single_file(filename,
                                       **kwds)
                signal.append(obj)
            signal = hyperspy.utils.stack(signal,
                                          axis=stack_axis,
                                          new_axis_name=new_axis_name,
                                          mmap=mmap, mmap_dir=mmap_dir)
            signal.metadata.General.title = \
                os.path.split(
                    os.path.split(
                        os.path.abspath(filenames[0])
                    )[0]
                )[1]
            messages.information('Individual files loaded correctly')
            signal._print_summary()
            objects = [signal, ]
        else:
            objects = [load_single_file(filename,
                                        **kwds)
                       for filename in filenames]

        if hyperspy.defaults_parser.preferences.Plot.plot_on_load:
            for obj in objects:
                obj.plot()
        if len(objects) == 1:
            objects = objects[0]
    return objects
Example #31
0
    def decomposition(self,
        normalize_poissonian_noise=False,
        algorithm = 'svd',
        output_dimension=None,
        centre=None,
        auto_transpose=True,
        navigation_mask=None,
        signal_mask=None,
        var_array=None,
        var_func=None,
        polyfit=None,
        reproject=None,
        **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the 
            decompostion.
        
        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the 
            decomposition.
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']: # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float type.'
                ' You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                                        navigation_mask=navigation_mask,
                                        signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')

            dc = self.data
            #set the output target (peak results or not?)
            target = self.learning_results
            
            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask
                
            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the 
            # stored value (at the end of the method) coincides with the 
            # input masks
            
            # Reset the explained_variance which is not set by all the 
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None
            
            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:], centre = centre,
                    auto_transpose = auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'   

            elif algorithm == 'nmf':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                
            elif algorithm == 'sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T
                
            elif algorithm == 'mini_batch_sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    messages.warning_exit(
                    "For MLPCA it is mandatory to define the "
                    "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                    'Supposing poissonian data')
                    var_array = dc[:,signal_mask][navigation_mask,:]

                if var_array is not None and var_func is not None:
                    messages.warning_exit(
                    "You have defined both the var_func and var_array "
                    "keywords."
                    "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask,...][:,navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(polyfit,dc[signal_mask,
                            navigation_mask])
                        except:
                            messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U,S,V,Sobj, ErrFlag = mlpca(
                    dc[:,signal_mask][navigation_mask,:],
                    var_array, output_dimension, fast = fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                     'Nothing done')

            # We must calculate the ratio here because otherwise the sum 
            # information can be lost if the user call 
            # crop_decomposition_dimension
            if explained_variance is not None and \
            explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()
                    
            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean
            

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)
            
            # Delete the unmixing information, because it'll refer to a previous
            # decompositions
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.mapped_parameters._internal_parameters.folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca', 
                                      'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:,signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:,signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings), 
                                     dc[navigation_mask,:] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None
            
            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG
                
            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask 
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask == True,:] = target.factors
                    factors[signal_mask == False,:] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros((dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask == True,:] = target.loadings
                    loadings[navigation_mask == False,:] = np.nan
                    target.loadings = loadings
        finally:
            #undo any pre-treatments
            self.undo_treatments()
            
            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Example #32
0
import os.path
import shutil
from hyperspy import messages

config_files = list()
data_path = os.sep.join([os.path.dirname(__file__), '..', 'data'])

if os.name == 'posix':
    config_path = os.path.join(os.path.expanduser('~'), '.hyperspy')
    os_name = 'posix'
elif os.name in ['nt', 'dos']:
##    appdata = os.environ['APPDATA']
    config_path = os.path.expanduser('~/.hyperspy')
# if os.path.isdir(appdata) is False:
# os.mkdir(appdata)
##    config_path = os.path.join(os.environ['APPDATA'], 'hyperspy')
    os_name = 'windows'
else:
    messages.warning_exit('Unsupported operating system: %s' % os.name)

if os.path.isdir(config_path) is False:
    messages.information("Creating config directory: %s" % config_path)
    os.mkdir(config_path)

for file in config_files:
    templates_file = os.path.join(data_path, file)
    config_file = os.path.join(config_path, file)
    if os.path.isfile(config_file) is False:
        messages.information("Setting configuration file: %s" % file)
        shutil.copy(templates_file, config_file)
Example #33
0
            program_files = os.environ['PROGRAMFILES(X86)']
            gos_path = os.path.join(program_files, gos)
            if os.path.isdir(gos_path) is False:
                gos_path = os.path.join(config_path, 'EELS_GOS')
    else:
        gos_path = os.path.join(config_path, 'EELS_GOS')
    return gos_path


if os.path.isfile(defaults_file):
    # Remove config file if obsolated
    f = open(defaults_file)
    if 'Not really' in f.readline():
        # It is the old config file
        f.close()
        messages.information('Removing obsoleted config file')
        os.remove(defaults_file)
        defaults_file_exists = False
    else:
        defaults_file_exists = True
else:
    defaults_file_exists = False

# Defaults template definition starts#####################################
# This "section" is all that has to be modified to add or remove sections and
# options from the defaults


class GeneralConfig(t.HasTraits):
    default_file_format = t.Enum(
        'hdf5',
Example #34
0
import os.path
import shutil
from hyperspy import messages

config_files = ['hyperspyrc', 'edges_db.csv']
data_path = os.sep.join([os.path.dirname(__file__), '..', 'data'])

if os.name == 'posix':
    config_path = os.path.join(os.path.expanduser('~'), '.hyperspy')
    os_name = 'posix'
elif os.name in ['nt', 'dos']:
    ##    appdata = os.environ['APPDATA']
    config_path = os.path.expanduser('~/.hyperspy')
    ##    if os.path.isdir(appdata) is False:
    ##        os.mkdir(appdata)
    ##    config_path = os.path.join(os.environ['APPDATA'], 'hyperspy')
    os_name = 'windows'
else:
    messages.warning_exit('Unsupported operating system: %s' % os.name)

if os.path.isdir(config_path) is False:
    messages.information("Creating config directory: %s" % config_path)
    os.mkdir(config_path)

for file in config_files:
    templates_file = os.path.join(data_path, file)
    config_file = os.path.join(config_path, file)
    if os.path.isfile(config_file) is False:
        messages.information("Setting configuration file: %s" % file)
        shutil.copy(templates_file, config_file)
Example #35
0
            program_files = os.environ["PROGRAMFILES(X86)"]
            gos_path = os.path.join(program_files, gos)
            if os.path.isdir(gos_path) is False:
                gos_path = os.path.join(config_path, "EELS_GOS")
    else:
        gos_path = os.path.join(config_path, "EELS_GOS")
    return gos_path


if os.path.isfile(defaults_file):
    # Remove config file if obsolated
    f = open(defaults_file)
    if "Not really" in f.readline():
        # It is the old config file
        f.close()
        messages.information("Removing obsoleted config file")
        os.remove(defaults_file)
        defaults_file_exists = False
    else:
        defaults_file_exists = True
else:
    defaults_file_exists = False

# Defaults template definition starts#####################################
# This "section" is all that has to be modified to add or remove sections and
# options from the defaults


class GeneralConfig(t.HasTraits):
    default_file_format = t.Enum(
        "hdf5",
Example #36
0
 def multifit(self, mask=None, charge_only_fixed=False,
              autosave=False, autosave_every=10, **kwargs):
     """Fit the data to the model at all the positions of the 
     navigation dimensions.        
     
     Parameters
     ----------
     
     mask : {None, numpy.array}
         To mask (do not fit) at certain position pass a numpy.array
         of type bool where True indicates that the data will not be
         fitted at the given position.
     charge_only_fixed : bool
         If True, only the fixed parameters values will be updated
         when changing the positon.
     autosave : bool
         If True, the result of the fit will be saved automatically
         with a frequency defined by autosave_every.
     autosave_every : int
         Save the result of fitting every given number of spectra.
     
     **kwargs : key word arguments
         Any extra key word argument will be passed to 
         the fit method. See the fit method documentation for 
         a list of valid arguments.
         
     See Also
     --------
     fit
         
     """
     
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(
             prefix = 'hyperspy_autosave-', 
             dir = '.', suffix = '.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information(
         "Autosaving each %s pixels to %s.npz" % (autosave_every, 
                                                  autosave_fn))
         messages.information(
         "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
        messages.warning_exit(
        "The mask must be a numpy array of boolen type with "
        " the same shape as the navigation: %s" % 
        self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     maxval=self.axes_manager.navigation_size - masked_elements
     if maxval > 0:
         pbar = progressbar.progressbar(maxval=maxval)
     if 'bounded' in kwargs and kwargs['bounded'] is True:
         if kwargs['fitter'] == 'mpfit':
             self.set_mpfit_parameters_info()
             kwargs['bounded'] = None
         elif kwargs['fitter'] in ("tnc", "l_bfgs_b"):
             self.set_boundaries()
             kwargs['bounded'] = None
         else:
             messages.information(
             "The chosen fitter does not suppport bounding."
             "If you require boundinig please select one of the "
             "following fitters instead: mpfit, tnc, l_bfgs_b")
             kwargs['bounded'] = False
     i = 0
     for index in self.axes_manager:
         if mask is None or not mask[index]:
             self.fit(**kwargs)
             i += 1
             if maxval > 0:
                 pbar.update(i)
         if autosave is True and i % autosave_every  == 0:
             self.save_parameters2file(autosave_fn)
     if maxval > 0:
         pbar.finish()
     if autosave is True:
         messages.information(
         'Deleting the temporary file %s pixels' % (
             autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #37
0
    def normalize_poissonian_noise(self, navigation_mask = None,
                                   signal_mask = None, return_masks = False):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to
        "normalize" the poissonian data for PCA analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably) Poissonian noise")
        # If energy axis is not first, it needs to be for MVA.
        refold = self.unfold_if_multidim()
        dc = self.data.T.squeeze().copy()
        navigation_mask = \
            self._correct_navigation_mask_when_unfolded(navigation_mask)
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[signal_mask,:][:,navigation_mask].sum(0).squeeze()
        bH = dc[signal_mask,:][:,navigation_mask].sum(1).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
            "Data error: negative values\n"
            "Are you sure that the data follow a poissonian distribution?")
        # Update the spatial and energy masks so it does not include rows
        # or colums that sum zero.
        aG0 = (aG == 0)
        bH0 = (bH == 0)
        if aG0.any():
            if isinstance(navigation_mask, slice):
                # Convert the slice into a mask before setting its values
                navigation_mask = np.ones((self.data.shape[1]),dtype = 'bool')
            # Set colums summing zero as masked
            navigation_mask[aG0] = False
            aG = aG[aG0 == False]
        if bH0.any():
            if isinstance(signal_mask, slice):
                # Convert the slice into a mask before setting its values
                signal_mask = np.ones((self.data.shape[0]), dtype = 'bool')
            # Set rows summing zero as masked
            signal_mask[bH0] = False
            bH = bH[bH0 == False]
        self._root_aG = np.sqrt(aG)[np.newaxis,:]
        self._root_bH = np.sqrt(bH)[:, np.newaxis]
        temp = (dc[signal_mask,:][:,navigation_mask] /
                (self._root_aG * self._root_bH))
        if  isinstance(signal_mask,slice) or isinstance(navigation_mask,slice):
            dc[signal_mask,navigation_mask] = temp
        else:
            mask3D = signal_mask[:, np.newaxis] * \
                navigation_mask[np.newaxis, :]
            dc[mask3D] = temp.ravel()
        # TODO - dc was never modifying self.data - was normalization ever
        # really getting applied?  Comment next lines as necessary.
        self.data = dc.T.copy()
        # end normalization write to self.data.
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
        if return_masks is True:
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None
            return navigation_mask, signal_mask
Example #38
0
    def decomposition(self, normalize_poissonian_noise=False,
    algorithm = 'svd', output_dimension=None, centre = None,
    auto_transpose = True, navigation_mask=None, signal_mask=None,
    var_array=None, var_func=None, polyfit=None, on_peaks=False, 
    reproject=None, **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
        
        signal_mask : boolean numpy array
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_scores, plot_lev

        """
        # backup the original data
        if on_peaks:
            if hasattr(self.mapped_parameters,'peak_chars'):
                self._data_before_treatments = \
                    self.mapped_parameters.peak_chars.copy()
            else:
                print """No peak characteristics found.  You must run the 
                         peak_char_stack function to obtain these before 
                         you can run PCA or ICA on them."""
        else:
            self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        if hasattr(navigation_mask, 'ravel'):
            navigation_mask = navigation_mask.ravel()

        if hasattr(signal_mask, 'ravel'):
            signal_mask = signal_mask.ravel()

        # Normalize the poissonian noise
        # TODO this function can change the masks and this can cause
        # problems when reprojecting
        if normalize_poissonian_noise is True:
            if reproject is None:
                navigation_mask, signal_mask = \
                    self.normalize_poissonian_noise(
                                            navigation_mask=navigation_mask,
                                            signal_mask=signal_mask,
                                            return_masks = True)
            elif reproject == 'both':
                _, _ = \
                    self.normalize_poissonian_noise(return_masks = True)  
            elif reproject == 'navigation':
                _, signal_mask = \
                    self.normalize_poissonian_noise(return_masks = True,
                                                    signal_mask=signal_mask,) 
            elif reproject == 'signal':
                navigation_mask, _ = \
                    self.normalize_poissonian_noise(return_masks = True,
                                            navigation_mask=navigation_mask,)         
            
        messages.information('Performing decomposition analysis')
        if on_peaks:
            dc = self.mapped_parameters.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data
            
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        
        # Reset the explained_variance which is not set by all the algorithms
        explained_variance = None
        explained_variance_ratio = None
        mean = None
        
        if algorithm == 'svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:], centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'fast_svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:],
            fast = True, output_dimension = output_dimension, centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'sklearn_pca':    
            sk = sklearn.decomposition.PCA(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            explained_variance = sk.explained_variance_
            mean = sk.mean_
            centre = 'trials'   

        elif algorithm == 'nmf':    
            sk = sklearn.decomposition.NMF(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            
        elif algorithm == 'sparse_pca':
            sk = sklearn.decomposition.SparsePCA(output_dimension, **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T
            
        elif algorithm == 'mini_batch_sparse_pca':
            sk = sklearn.decomposition.MiniBatchSparsePCA(output_dimension,
                                                            **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc[:,signal_mask][navigation_mask,:]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            U,S,V,Sobj, ErrFlag = mlpca(
                dc[:,signal_mask][navigation_mask,:],
                var_array, output_dimension, fast = fast)
            scores = U * S
            factors = V
            explained_variance_ratio = S ** 2 / Sobj
            explained_variance = S ** 2 / len(factors)
        else:
            messages.information('Error: Algorithm not recognised. '
                                 'Nothing done')
            return False

        # We must calculate the ratio here because otherwise the sum information
        # can be lost if the user call crop_decomposition_dimension
        if explained_variance is not None and explained_variance_ratio is None:
            explained_variance_ratio = \
                explained_variance / explained_variance.sum()
                
        # Store the results in mva_results
        target.factors = factors
        target.scores = scores
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio
        target.decomposition_algorithm = algorithm
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4decomposition
        target.centre = centre
        target.mean = mean
        

        if output_dimension and factors.shape[1] != output_dimension:
            target.crop_decomposition_dimension(output_dimension)
        
        # Delete the unmixing information, because it'll refer to a previous
        # decompositions
        target.unmixing_matrix = None
        target.ica_algorithm = None

        if self._unfolded4decomposition is True:
            target.original_shape = self._shape_before_unfolding

        # Reproject
        if mean is None:
            mean = 0
        if reproject in ('navigation', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                scores_ = np.dot(dc[:,signal_mask] - mean, factors)
            else:
                scores_ = sk.transform(dc[:,signal_mask])
            target.scores = scores_
        if reproject in ('signal', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                factors = np.dot(np.linalg.pinv(scores), 
                                 dc[navigation_mask,:] - mean).T
                target.factors = factors
            else:
                messages.information("Reprojecting the signal is not yet "
                                     "supported for this algorithm")
                if reproject == 'both':
                    reproject = 'signal'
                else:
                    reproject = None
        
        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors[:] *= self._root_bH.T
            target.scores[:] *= self._root_aG
            
        # Set the pixels that were not processed to nan
        if not isinstance(signal_mask, slice):
            target.signal_mask = signal_mask
            if reproject not in ('both', 'signal'):
                factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                factors[signal_mask == True,:] = target.factors
                factors[signal_mask == False,:] = np.nan
                target.factors = factors
        if not isinstance(navigation_mask, slice):
            target.navigation_mask = navigation_mask
            if reproject not in ('both', 'navigation'):
                scores = np.zeros((dc.shape[0], target.scores.shape[1]))
                scores[navigation_mask == True,:] = target.scores
                scores[navigation_mask == False,:] = np.nan
                target.scores = scores

        #undo any pre-treatments
        self.undo_treatments(on_peaks)
        
        if self._unfolded4decomposition is True:
            self.fold()
            self._unfolded4decomposition is False
Example #39
0
                                 ripple, tiff)
io_plugins = [msa, digital_micrograph, fei, mrc, ripple, tiff]
try:
    from hyperspy.io_plugins import netcdf
    io_plugins.append(netcdf)
except ImportError:
    pass
    # NetCDF is obsolate and is only provided for users who have
    # old EELSLab files. Therefore, we print no message if it is not
    # available
    #~ messages.information('The NetCDF IO features are not available')

try:
    from hyperspy.io_plugins import hdf5
    io_plugins.append(hdf5)
except ImportError:
    messages.warning('The HDF5 IO features are not available. '
                     'It is highly reccomended to install h5py')

try:
    from hyperspy.io_plugins import image
    io_plugins.append(image)
except ImportError:
    messages.information('The Image (PIL) IO features are not available')

default_write_ext = set()
for plugin in io_plugins:
    if plugin.writes:
        default_write_ext.add(
            plugin.file_extensions[plugin.default_extension])