Example #1
0
def load_single_file(filename, record_by=None, **kwds):
    """
    Load any supported file into an Hyperspy structure
    Supported formats: netCDF, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5.

    Parameters
    ----------

    filename : string
        File name (including the extension)
    record_by : {None, 'spectrum', 'image'}
        If None (default) it will try to guess the data type from the file,
        if 'spectrum' the file will be loaded as an Spectrum object
        If 'image' the file will be loaded as an Image object
    """
    extension = os.path.splitext(filename)[1][1:]

    i = 0
    while extension not in io_plugins[i].file_extensions and \
        i < len(io_plugins) - 1:
        i += 1
    if i == len(io_plugins):
        # Try to load it with the python imaging library
        reader = image
        try:
            return load_with_reader(filename, reader, record_by, **kwds)
        except:
            messages.warning_exit('File type not supported')
    else:
        reader = io_plugins[i]
        return load_with_reader(filename, reader, record_by, **kwds)
Example #2
0
def _plot_loading(loadings, idx, axes_manager, ax=None,
                  comp_label='PC', no_nans=True, calibrate=True,
                  cmap=plt.cm.gray):
    if ax is None:
        ax = plt.gca()
    if no_nans:
        loadings = np.nan_to_num(loadings)
    if axes_manager.navigation_dimension == 2:
        extent = None
        # get calibration from a passed axes_manager
        shape = axes_manager._navigation_shape_in_array
        if calibrate:
            extent = (axes_manager._axes[0].low_value,
                      axes_manager._axes[0].high_value,
                      axes_manager._axes[1].high_value,
                      axes_manager._axes[1].low_value)
        im = ax.imshow(loadings[idx].reshape(shape), cmap=cmap, extent=extent,
                       interpolation='nearest')
        div = make_axes_locatable(ax)
        cax = div.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(im, cax=cax)
    elif axes_manager.navigation_dimension == 1:
        if calibrate:
            x = axes_manager._axes[0].axis
        else:
            x = np.arange(axes_manager._axes[0].size)
        ax.step(x, loadings[idx])
    else:
        messages.warning_exit('View not supported')
Example #3
0
def svd_pca(data, fast=False, output_dimension=None, centre=None, auto_transpose=True):
    """Perform PCA using SVD.

    Parameters
    ----------
    data : numpy array
        MxN array of input data (M variables, N trials)
    fast : bool
        Wheter to use randomized svd estimation to estimate a limited number of
        componentes given by output_dimension
    output_dimension : int
        Number of components to estimate when fast is True
    centre : None | 'variables' | 'trials'
        If None no centring is applied. If 'variable' the centring will be
        performed in the variable axis. If 'trials', the centring will be
        performed in the 'trials' axis.
    auto_transpose : bool
        If True, automatically transposes the data to boost performance

    Returns
    -------

    factors : numpy array
    loadings : numpy array
    explained_variance : numpy array
    mean : numpy array or None (if center is None)
    """
    N, M = data.shape
    if centre is not None:
        if centre == "variables":
            mean = data.mean(1)[:, np.newaxis]
        elif centre == "trials":
            mean = data.mean(0)[np.newaxis, :]
        else:
            raise AttributeError("centre must be one of: None, variables, trials")
        data -= mean
    else:
        mean = None
    if auto_transpose is True:
        if N < M:
            print("Auto transposing the data")
            data = data.T
        else:
            auto_transpose = False
    if fast is True and sklearn_installed is True:
        if output_dimension is None:
            messages.warning_exit("When using fast_svd it is necessary to " "define the output_dimension")
        U, S, V = fast_svd(data, output_dimension)
    else:
        U, S, V = scipy.linalg.svd(data, full_matrices=False)
    if auto_transpose is False:
        factors = V.T
        explained_variance = S ** 2 / N
        loadings = U * S
    else:
        loadings = V.T
        explained_variance = S ** 2 / N
        factors = U * S
    return factors, loadings, explained_variance, mean
Example #4
0
 def txt_to_fine_structure_coeff(self, filename):
     fs = np.loadtxt(filename)
     self._calculate_knots()
     if len(fs) == len(self.__knots):
         self.fine_structure_coeff.value = fs
     else:
         messages.warning_exit("The provided fine structure file "
                               "doesn't match the size of the current fine structure")
Example #5
0
    def plot(self, axes_manager=None):
        if self._plot is not None:
                try:
                    self._plot.close()
                except:
                    # If it was already closed it will raise an exception,
                    # but we want to carry on...
                    pass

        if axes_manager is None:
            axes_manager = self.axes_manager

        if axes_manager.signal_dimension == 1:
            # Hyperspectrum

            self._plot = mpl_hse.MPL_HyperSpectrum_Explorer()
            self._plot.spectrum_data_function = self.__call__
            self._plot.spectrum_title = self.mapped_parameters.name
            self._plot.xlabel = '%s (%s)' % (
                self.axes_manager._slicing_axes[0].name,
                self.axes_manager._slicing_axes[0].units)
            self._plot.ylabel = 'Intensity'
            self._plot.axes_manager = axes_manager
            self._plot.axis = self.axes_manager._slicing_axes[0].axis

            # Image properties
            if self.axes_manager._non_slicing_axes:
                self._plot.image_data_function = self._get_explorer
                self._plot.image_title = ''
                self._plot.pixel_size = \
                self.axes_manager._non_slicing_axes[0].scale
                self._plot.pixel_units = \
                self.axes_manager._non_slicing_axes[0].units
            self._plot.plot()

        elif axes_manager.signal_dimension == 2:

            # Mike's playground with new plotting toolkits - needs to be a
            # branch.
            """
            if len(self.data.shape)==2:
                from drawing.guiqwt_hie import image_plot_2D
                image_plot_2D(self)

            import drawing.chaco_hie
            self._plot = drawing.chaco_hie.Chaco_HyperImage_Explorer(self)
            self._plot.configure_traits()
            """
            self._plot = mpl_hie.MPL_HyperImage_Explorer()
            self._plot.image_data_function = self.__call__
            self._plot.navigator_data_function = self._get_explorer
            self._plot.axes_manager = axes_manager
            self._plot.plot()

        else:
            messages.warning_exit('Plotting is not supported for this view')
Example #6
0
 def multifit(self, mask = None, fitter = None, 
              charge_only_fixed = False, grad = False, autosave = False, 
              autosave_every = 10, bounded = False, **kwargs):
     
     if fitter is None:
         fitter = preferences.Model.default_fitter
         print('Fitter: %s' % fitter) 
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(prefix = 'hyperspy_autosave-', 
         dir = '.', suffix = '.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information(
         "Autosaving each %s pixels to %s.npz" % (autosave_every, 
                                                  autosave_fn))
         messages.information(
         "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
        messages.warning_exit(
        "The mask must be an array with the same espatial dimensions as the" 
        "navigation shape, %s" % self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     pbar = progressbar.progressbar(
     maxval = (np.cumprod(self.axes_manager.navigation_shape)[-1] - 
     masked_elements))
     if bounded is True:
         if fitter == 'mpfit':
             self.set_mpfit_parameters_info()
             bounded = None
         elif fitter in ("tnc", "l_bfgs_b"):
             self.set_boundaries()
             bounded = None
         else:
             messages.information(
             "The chosen fitter does not suppport bounding."
             "If you require boundinig please select one of the following"
             "fitters instead: mpfit, tnc, l_bfgs_b")
             bounded = False
     i = 0
     for index in np.ndindex(tuple(self.axes_manager.navigation_shape)):
         if mask is None or not mask[index]:
             self.axes_manager.set_not_slicing_indexes(index)
             self.charge(only_fixed = charge_only_fixed)
             self.fit(fitter = fitter, grad = grad, bounded = bounded, 
                      **kwargs)
             i += 1
             pbar.update(i)
         if autosave is True and i % autosave_every  == 0:
             self.save_parameters2file(autosave_fn)
     pbar.finish()
     if autosave is True:
         messages.information(
         'Deleting the temporary file %s pixels' % (autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #7
0
    def readgosfile(self): 
        element = self.__element
        # Convert to the "GATAN" nomenclature
        if self.__subshell == "K" :
            subshell = "K1"
        else:
            subshell = self.__subshell
        if edges_dict.has_key(element) is not True:
            message = "The given element " + element + \
            " is not in the database."
            messages.warning_exit(message)
        elif edges_dict[element]['subshells'].has_key(subshell) is not True :
            message =  "The given subshell " + subshell + \
            " is not in the database." + "\nThe available subshells are:\n" + \
            str(edges_dict[element]['subshells'].keys())
            messages.warning_exit(message)
            
        self.edgeenergy = \
        edges_dict[element]['subshells'][subshell]['onset_energy']
        self.__subshell_factor = \
        edges_dict[element]['subshells'][subshell]['factor']
        print "\nLoading Hartree-Slater cross section from the Gatan tables"
        print "Element: ", element
        print "Subshell: ", subshell
        print "Onset Energy = ", self.edgeenergy
        #Read file
        file = os.path.join(defaults.GOS_dir, 
        edges_dict[element]['subshells'][subshell]['filename'])
        f = open(file)
 
        #Tranfer the content of the file to a list
        GosList = f.read().replace('\r','').split()

        #Extract the parameters

        self.material = GosList[0]
        self.__info1_1 = float(GosList[2])
        self.__info1_2 = float(GosList[3])
        self.__info1_3 = float(GosList[4])
        self.__ncol    = int(GosList[5])
        self.__info2_1 = float(GosList[6])
        self.__info2_2 = float(GosList[7])
        self.__nrow    = int(GosList[8])
        self.__gos_array = np.array(GosList[9:]).reshape(self.__nrow, 
        self.__ncol).astype(np.float64)
        
        # Calculate the scale of the matrix
        self.energyaxis = self.__info2_1 * (exp(np.linspace(0, 
        self.__nrow-1,self.__nrow) * self.__info2_2 / self.__info2_1) - 1.0)
        
        self.__qaxis=(self.__info1_1 * (exp(np.linspace(1, self.__ncol, 
        self.__ncol) * self.__info1_2) - 1.0)) * 1.0e10
        self.__sqa0qaxis = (a0 * self.__qaxis)**2
        self.__logsqa0qaxis = log((a0 * self.__qaxis)**2)
Example #8
0
def _plot_loading(loadings, idx, axes_manager, ax=None,
                  comp_label=None, no_nans=True,
                  calibrate=True, cmap=plt.cm.gray,
                  same_window=False):
    if ax is None:
        ax = plt.gca()
    if no_nans:
        loadings = np.nan_to_num(loadings)
    axes = axes_manager.navigation_axes
    if axes_manager.navigation_dimension == 2:
        extent = None
        # get calibration from a passed axes_manager
        shape = axes_manager._navigation_shape_in_array
        if calibrate:
            extent = (axes[0].low_value,
                      axes[0].high_value,
                      axes[1].high_value,
                      axes[1].low_value)
        im = ax.imshow(loadings[idx].reshape(shape),
                       cmap=cmap, extent=extent,
                       interpolation='nearest')
        if calibrate:
            plt.xlabel(axes[0].units)
            plt.ylabel(axes[1].units)
        else:
            plt.xlabel('pixels')
            plt.ylabel('pixels')
        if comp_label:
            plt.title('%s %s' % (comp_label, idx))
        div = make_axes_locatable(ax)
        cax = div.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(im, cax=cax)
    elif axes_manager.navigation_dimension == 1:
        if calibrate:
            x = axes[0].axis
        else:
            x = np.arange(axes[0].size)
        ax.step(x, loadings[idx],
                label='%s %s' % (comp_label, idx))
        if comp_label and not same_window:
            plt.title('%s %s' % (comp_label, idx))
        plt.ylabel('Score, Arb. Units')
        if calibrate:
            if axes[0].units is not Undefined:
                plt.xlabel(axes[0].units)
            else:
                plt.xlabel('depth')
        else:
            plt.xlabel('depth')
    else:
        messages.warning_exit('View not supported')
Example #9
0
def load(*filenames, **kwds):
    """
    Load potentially multiple supported file into an hyperspy structure
    Supported formats: netCDF, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5.

    *filenames : if multiple file names are passed in, they get aggregated to
    a Signal class that has members for each file, plus a data set that
    consists of stacked input files. That stack has one dimension more than
    the input files. All files must match in size, number of dimensions, and
    type/extension.

    *kwds : any specified parameters.  Currently, the only interesting one
    here is data_type, to manually force the outcome Signal to a particular
    type.

    Example usage:
        Loading a single file:
            d=load('file.dm3')
        Loading a single file and overriding its default data_type:
            d=load('file.dm3',data_type='Image')
        Loading multiple files:
            d=load('file1.dm3','file2.dm3')

    """

    if len(filenames)<1:
        messages.warning_exit('No file provided to reader.')
        return None
    elif len(filenames)==1:
        if '*' in filenames[0]:
            from glob import glob
            filenames=glob(filenames[0])
            print filenames
        else:
            return load_single_file(filenames[0], **kwds)
    import hyperspy.signals.aggregate as agg
    objects=[load_single_file(filename, **kwds) for filename in filenames]

    obj_type=objects[0].__class__.__name__
    if obj_type=='Image':
        if len(objects[0].data.shape)==3:
            # feeding 3d objects creates cell stacks
            agg_sig=agg.AggregateCells(*objects)
        else:
            agg_sig=agg.AggregateImage(*objects)
    elif 'Spectrum' in obj_type:
        agg_sig=agg.AggregateSpectrum(*objects)
    else:
        agg_sig=agg.Aggregate(*objects)
    return agg_sig
Example #10
0
def file_reader(filename, *args, **kwds):
    if no_netcdf is True:
        messages.warning_exit(no_netcdf_message)

    ncfile = Dataset(filename, "r")

    if hasattr(ncfile, "file_format_version"):
        if ncfile.file_format_version == "EELSLab 0.1":
            dictionary = nc_hyperspy_reader_0dot1(ncfile, filename, *args, **kwds)
    else:
        ncfile.close()
        messages.warning_exit("Unsupported netCDF file")

    return (dictionary,)
Example #11
0
    def normalize_poissonian_noise(self, navigation_mask=None,
                                   signal_mask=None):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 
        to "normalize" the poissonian data for decomposition analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably)"
            " Poissonian noise")
        refold = self.unfold_if_multidim()
        dc = self.data
        if navigation_mask is None:
            navigation_mask = slice(None)
        else:
            navigation_mask = ~navigation_mask.ravel()
        if signal_mask is None:
            signal_mask = slice(None)
        else:
            signal_mask = ~signal_mask
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[:,signal_mask][navigation_mask,:].sum(1).squeeze()
        bH = dc[:,signal_mask][navigation_mask,:].sum(0).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
            "Data error: negative values\n"
            "Are you sure that the data follow a poissonian "
            "distribution?")

        self._root_aG = np.sqrt(aG)[:, np.newaxis]
        self._root_bH = np.sqrt(bH)[np.newaxis, :]
        # We first disable numpy's warning when the result of an
        # operation produces nans
        np.seterr(invalid='ignore')
        dc[:,signal_mask][navigation_mask,:] /= (self._root_aG * 
            self._root_bH)
        # Enable numpy warning
        np.seterr(invalid=None)
        #Set the nans resulting from 0/0 to zero
        dc[:,signal_mask][navigation_mask,:] = \
            np.nan_to_num(dc[:,signal_mask][navigation_mask,:])
        
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
Example #12
0
def save(filename, signal, format = 'hdf5', **kwds):
    extension = os.path.splitext(filename)[1][1:]
    i = 0
    if extension == '':
        extension = format
        filename = filename + '.' + format
    while extension not in io_plugins[i].file_extensions and \
        i < len(io_plugins) - 1:
        i += 1
    if i == len(io_plugins):
        messages.warning_exit('File type not supported')
    else:
        writer = io_plugins[i]
        # Check if the writer can write
        writer.file_writer(filename, signal, **kwds)
Example #13
0
    def split_in(self, axis, number_of_parts = None, steps = None):
        """Splits the data

        The split can be defined either by the `number_of_parts` or by the
        `steps` size.

        Parameters
        ----------
        number_of_parts : int or None
            Number of parts in which the SI will be splitted
        steps : int or None
            Size of the splitted parts
        axis : int
            The splitting axis

        Return
        ------
        tuple with the splitted signals
        """
        axis = self._get_positive_axis_index_index(axis)
        if number_of_parts is None and steps is None:
            if not self._splitting_steps:
                messages.warning_exit(
                "Please provide either number_of_parts or a steps list")
            else:
                steps = self._splitting_steps
                print "Splitting in ", steps
        elif number_of_parts is not None and steps is not None:
            print "Using the given steps list. number_of_parts dimissed"
        splitted = []
        shape = self.data.shape

        if steps is None:
            rounded = (shape[axis] - (shape[axis] % number_of_parts))
            step = rounded / number_of_parts
            cut_node = range(0, rounded+step, step)
        else:
            cut_node = np.array([0] + steps).cumsum()
        for i in xrange(len(cut_node)-1):
            data = self.data[
            (slice(None), ) * axis + (slice(cut_node[i], cut_node[i + 1]),
            Ellipsis)]
            s = Signal({'data': data})
            # TODO: When copying plotting does not work
#            s.axes = copy.deepcopy(self.axes_manager)
            s.get_dimensions_from_data()
            splitted.append(s)
        return splitted
Example #14
0
    def split_in(self, axis, number_of_parts=None, steps=None):
        """Splits the data

        The split can be defined either by the `number_of_parts` or by the
        `steps` size.

        Parameters
        ----------
        number_of_parts : int or None
            Number of parts in which the SI will be splitted
        steps : int or None
            Size of the splitted parts
        axis : int
            The splitting axis

        Return
        ------
        tuple with the splitted signals
        """
        axis = self._get_positive_axis_index_index(axis)
        if number_of_parts is None and steps is None:
            if not self._splitting_steps:
                messages.warning_exit(
                    "Please provide either number_of_parts or a steps list")
            else:
                steps = self._splitting_steps
                print "Splitting in ", steps
        elif number_of_parts is not None and steps is not None:
            print "Using the given steps list. number_of_parts dimissed"
        splitted = []
        shape = self.data.shape

        if steps is None:
            rounded = (shape[axis] - (shape[axis] % number_of_parts))
            step = rounded / number_of_parts
            cut_node = range(0, rounded + step, step)
        else:
            cut_node = np.array([0] + steps).cumsum()
        for i in xrange(len(cut_node) - 1):
            data = self.data[(slice(None), ) * axis +
                             (slice(cut_node[i], cut_node[i + 1]), Ellipsis)]
            s = Signal({'data': data})
            # TODO: When copying plotting does not work
            #            s.axes = copy.deepcopy(self.axes_manager)
            s.get_dimensions_from_data()
            splitted.append(s)
        return splitted
Example #15
0
def file_reader(filename, *args, **kwds):
    if no_netcdf is True:
        raise ImportError("No netCDF library installed. "
            "To read EELSLab netcdf files install "
            "one of the following packages:"
            "netCDF4, netCDF3, netcdf, scientific")
    
    ncfile = Dataset(filename,'r')
    
    if hasattr(ncfile, 'file_format_version'):
        if ncfile.file_format_version == 'EELSLab 0.1':
            dictionary = nc_hyperspy_reader_0dot1(ncfile, filename, *args, **kwds)
    else:
        ncfile.close()
        messages.warning_exit('Unsupported netCDF file')
        
    return (dictionary,)
Example #16
0
def save(filename, signal, **kwds):
    extension = os.path.splitext(filename)[1][1:]
    i = 0
    if extension == '':
        extension = \
            hyperspy.defaults_parser.preferences.General.default_file_format
        filename = filename + '.' + \
            hyperspy.defaults_parser.preferences.General.default_file_format
    while extension not in io_plugins[i].file_extensions and \
        i < len(io_plugins) - 1:
        i += 1
    if i == len(io_plugins):
        messages.warning_exit('File type not supported')
    else:
        writer = io_plugins[i]
        # Check if the writer can write
        writer.file_writer(filename, signal, **kwds)
        print('The %s file was created' % filename)
Example #17
0
def file_reader(filename, *args, **kwds):
    if no_netcdf is True:
        raise ImportError("No netCDF library installed. "
                          "To read EELSLab netcdf files install "
                          "one of the following packages:"
                          "netCDF4, netCDF3, netcdf, scientific")

    ncfile = Dataset(filename, 'r')

    if hasattr(ncfile, 'file_format_version'):
        if ncfile.file_format_version == 'EELSLab 0.1':
            dictionary = nc_hyperspy_reader_0dot1(ncfile, filename, *args,
                                                  **kwds)
    else:
        ncfile.close()
        messages.warning_exit('Unsupported netCDF file')

    return (dictionary, )
Example #18
0
    def __init__(self, element_subshell, intensity=1.,delta=0.):
        # Check if the Peter Rez's Hartree Slater GOS distributed by Gatan 
        # are available. Otherwise exit
        if defaults.GOS_dir == 'None':
            messages.warning_exit(
            "The path to the GOS files could not be found.\n" \
            "Please define a valid GOS folder location in the configuration" \
            " file.")
        # Declare which are the "real" parameters
        Component.__init__(self, ['delta', 'intensity', 'fslist', 
        'effective_angle'])
        self.name = element_subshell
        # Set initial values
        self.__element, self.__subshell = element_subshell.split('_')
        self.energy_scale = None
        self.T = None
        self.gamma = None
        self.convergence_angle = None
        self.collection_angle = None
        self.E0 = None
        self.effective_angle.value = 0
        self.effective_angle.free = False
        self.fs_state = defaults.fs_state
        self.fs_emax = defaults.fs_emax
        self.fs_mode = "new_spline"
        self.fslist.ext_force_positive = False
        
        self.delta.value = delta
        self.delta.free = False
        self.delta.ext_force_positive = False
        self.delta.grad = self.grad_delta
        self.freedelta = False
        self._previous_delta = delta
                                
        self.intensity.grad = self.grad_intensity
        self.intensity.value = intensity
        self.intensity.bmin = 0.
        self.intensity.bmax = None

        self.knots_factor = defaults.knots_factor

        # Set initial actions
        self.readgosfile()
Example #19
0
 def multifit(self,
              mask=None,
              fitter="leastsq",
              charge_only_fixed=False,
              grad=False,
              autosave=False,
              autosave_every=10,
              **kwargs):
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(prefix='hyperspy_autosave-',
                                            dir='.',
                                            suffix='.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information("Autosaving each %s pixels to %s.npz" %
                              (autosave_every, autosave_fn))
         messages.information(
             "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
         messages.warning_exit(
             "The mask must be an array with the same espatial dimensions as the"
             "navigation shape, %s" % self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     pbar = progressbar.progressbar(
         maxval=(np.cumprod(self.axes_manager.navigation_shape)[-1] -
                 masked_elements))
     i = 0
     for index in np.ndindex(tuple(self.axes_manager.navigation_shape)):
         if mask is None or not mask[index]:
             self.axes_manager.set_not_slicing_indexes(index)
             self.charge(only_fixed=charge_only_fixed)
             self.fit(fitter=fitter, grad=grad, **kwargs)
             i += 1
             pbar.update(i)
         if autosave is True and i % autosave_every == 0:
             self.save_parameters2file(autosave_fn)
     pbar.finish()
     if autosave is True:
         messages.information('Deleting the temporary file %s pixels' %
                              (autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #20
0
def load_single_file(filename, record_by=None, output_level=2, 
    signal_type=None, **kwds):
    """
    Load any supported file into an Hyperspy structure
    Supported formats: netCDF, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5.

    Parameters
    ----------

    filename : string
        File name (including the extension)
    record_by : {None, 'spectrum', 'image'}
        If None (default) it will try to guess the data type from the file,
        if 'spectrum' the file will be loaded as an Spectrum object
        If 'image' the file will be loaded as an Image object
    output_level : int
        If 0, do not output file loading text.
        If 1, output simple file summary (data type and shape)
        If 2, output more diagnostic output (e.g. number of tags for DM3 files)
    """
    extension = os.path.splitext(filename)[1][1:]

    i = 0
    while extension.lower() not in io_plugins[i].file_extensions and \
        i < len(io_plugins) - 1:
        i += 1
    if i == len(io_plugins):
        # Try to load it with the python imaging library
        reader = image
        try:
            return load_with_reader(filename, reader, record_by, 
                signal_type=signal_type, **kwds)
        except:
            messages.warning_exit('File type not supported')
    else:
        reader = io_plugins[i]
        return load_with_reader(filename, reader, record_by,
                    signal_type=signal_type,
                    output_level=output_level, **kwds)
Example #21
0
 def multifit(self, mask = None, fitter = "leastsq", 
              charge_only_fixed = False, grad = False, autosave = False, 
              autosave_every = 10, **kwargs):
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(prefix = 'hyperspy_autosave-', 
         dir = '.', suffix = '.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information(
         "Autosaving each %s pixels to %s.npz" % (autosave_every, 
                                                  autosave_fn))
         messages.information(
         "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
        messages.warning_exit(
        "The mask must be an array with the same espatial dimensions as the" 
        "navigation shape, %s" % self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     pbar = progressbar.progressbar(
     maxval = (np.cumprod(self.axes_manager.navigation_shape)[-1] - 
     masked_elements))
     i = 0
     for index in np.ndindex(tuple(self.axes_manager.navigation_shape)):
         if mask is None or not mask[index]:
             self.axes_manager.set_not_slicing_indexes(index)
             self.charge(only_fixed = charge_only_fixed)
             self.fit(fitter = fitter, grad = grad, **kwargs)
             i += 1
             pbar.update(i)
         if autosave is True and i % autosave_every  == 0:
             self.save_parameters2file(autosave_fn)
     pbar.finish()
     if autosave is True:
         messages.information(
         'Deleting the temporary file %s pixels' % (autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #22
0
def _plot_loading(loadings,
                  idx,
                  axes_manager,
                  ax=None,
                  comp_label='PC',
                  no_nans=True,
                  calibrate=True,
                  cmap=plt.cm.gray):
    if ax == None:
        ax = plt.gca()
    if no_nans:
        loadings = np.nan_to_num(loadings)
    if axes_manager.navigation_dimension == 2:
        extent = None
        # get calibration from a passed axes_manager
        shape = axes_manager._navigation_shape_in_array
        if calibrate:
            extent = (axes_manager._axes[0].low_value,
                      axes_manager._axes[0].high_value,
                      axes_manager._axes[1].high_value,
                      axes_manager._axes[1].low_value)
        im = ax.imshow(loadings[idx].reshape(shape),
                       cmap=cmap,
                       extent=extent,
                       interpolation='nearest')
        div = make_axes_locatable(ax)
        cax = div.append_axes("right", size="5%", pad=0.05)
        plt.colorbar(im, cax=cax)
    elif axes_manager.navigation_dimension == 1:
        if calibrate:
            x = axes_manager._axes[0].axis
        else:
            x = np.arange(axes_manager._axes[0].size)
        ax.step(x, loadings[idx])
    else:
        messages.warning_exit('View not supported')
Example #23
0
 def multifit(self, mask=None, charge_only_fixed=False,
              autosave=False, autosave_every=10, **kwargs):
     """Fit the data to the model at all the positions of the 
     navigation dimensions.        
     
     Parameters
     ----------
     
     mask : {None, numpy.array}
         To mask (do not fit) at certain position pass a numpy.array
         of type bool where True indicates that the data will not be
         fitted at the given position.
     charge_only_fixed : bool
         If True, only the fixed parameters values will be updated
         when changing the positon.
     autosave : bool
         If True, the result of the fit will be saved automatically
         with a frequency defined by autosave_every.
     autosave_every : int
         Save the result of fitting every given number of spectra.
     
     **kwargs : key word arguments
         Any extra key word argument will be passed to 
         the fit method. See the fit method documentation for 
         a list of valid arguments.
         
     See Also
     --------
     fit
         
     """
     
     if autosave is not False:
         fd, autosave_fn = tempfile.mkstemp(
             prefix = 'hyperspy_autosave-', 
             dir = '.', suffix = '.npz')
         os.close(fd)
         autosave_fn = autosave_fn[:-4]
         messages.information(
         "Autosaving each %s pixels to %s.npz" % (autosave_every, 
                                                  autosave_fn))
         messages.information(
         "When multifit finishes its job the file will be deleted")
     if mask is not None and \
     (mask.shape != tuple(self.axes_manager.navigation_shape)):
        messages.warning_exit(
        "The mask must be a numpy array of boolen type with "
        " the same shape as the navigation: %s" % 
        self.axes_manager.navigation_shape)
     masked_elements = 0 if mask is None else mask.sum()
     maxval=self.axes_manager.navigation_size - masked_elements
     if maxval > 0:
         pbar = progressbar.progressbar(maxval=maxval)
     if 'bounded' in kwargs and kwargs['bounded'] is True:
         if kwargs['fitter'] == 'mpfit':
             self.set_mpfit_parameters_info()
             kwargs['bounded'] = None
         elif kwargs['fitter'] in ("tnc", "l_bfgs_b"):
             self.set_boundaries()
             kwargs['bounded'] = None
         else:
             messages.information(
             "The chosen fitter does not suppport bounding."
             "If you require boundinig please select one of the "
             "following fitters instead: mpfit, tnc, l_bfgs_b")
             kwargs['bounded'] = False
     i = 0
     for index in self.axes_manager:
         if mask is None or not mask[index]:
             self.fit(**kwargs)
             i += 1
             if maxval > 0:
                 pbar.update(i)
         if autosave is True and i % autosave_every  == 0:
             self.save_parameters2file(autosave_fn)
     if maxval > 0:
         pbar.finish()
     if autosave is True:
         messages.information(
         'Deleting the temporary file %s pixels' % (
             autosave_fn + 'npz'))
         os.remove(autosave_fn + '.npz')
Example #24
0
def svd_pca(data,
            fast=False,
            output_dimension=None,
            centre=None,
            auto_transpose=True):
    """Perform PCA using SVD.

    Parameters
    ----------
    data : numpy array
        MxN array of input data (M variables, N trials)
    fast : bool
        Wheter to use randomized svd estimation to estimate a limited number of
        componentes given by output_dimension
    output_dimension : int
        Number of components to estimate when fast is True
    centre : None | 'variables' | 'trials'
        If None no centring is applied. If 'variable' the centring will be
        performed in the variable axis. If 'trials', the centring will be
        performed in the 'trials' axis.
    auto_transpose : bool
        If True, automatically transposes the data to boost performance

    Returns
    -------

    factors : numpy array
    loadings : numpy array
    explained_variance : numpy array
    mean : numpy array or None (if center is None)
    """
    N, M = data.shape
    if centre is not None:
        if centre == 'variables':
            mean = data.mean(1)[:, np.newaxis]
        elif centre == 'trials':
            mean = data.mean(0)[np.newaxis, :]
        else:
            raise AttributeError(
                'centre must be one of: None, variables, trials')
        data -= mean
    else:
        mean = None
    if auto_transpose is True:
        if N < M:
            print("Auto transposing the data")
            data = data.T
        else:
            auto_transpose = False
    if fast is True and sklearn_installed is True:
        if output_dimension is None:
            messages.warning_exit('When using fast_svd it is necessary to '
                                  'define the output_dimension')
        U, S, V = fast_svd(data, output_dimension)
    else:
        U, S, V = scipy.linalg.svd(data, full_matrices=False)
    if auto_transpose is False:
        factors = V.T
        explained_variance = S**2 / N
        loadings = U * S
    else:
        loadings = V.T
        explained_variance = S**2 / N
        factors = U * S
    return factors, loadings, explained_variance, mean
Example #25
0
    def normalize_poissonian_noise(self, navigation_mask = None,
                                   signal_mask = None, return_masks = False):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to
        "normalize" the poissonian data for decomposition analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably) Poissonian noise")
        refold = self.unfold_if_multidim()
        dc = self.data
        if navigation_mask is None:
            navigation_mask = slice(None)
        else:
            navigation_mask = navigation_mask.ravel()
        if signal_mask is None:
            signal_mask = slice(None)
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[:,signal_mask][navigation_mask,:].sum(1).squeeze()
        bH = dc[:,signal_mask][navigation_mask,:].sum(0).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
            "Data error: negative values\n"
            "Are you sure that the data follow a poissonian distribution?")
        # Update the spatial and energy masks so it does not include rows
        # or colums that sum zero.
        aG0 = (aG == 0)
        bH0 = (bH == 0)
        if aG0.any():
            if isinstance(navigation_mask, slice):
                # Convert the slice into a mask before setting its values
                navigation_mask = np.ones((self.data.shape[0]),dtype = 'bool')
            # Set colums summing zero as masked
            navigation_mask[aG0] = False
            aG = aG[aG0 == False]
        if bH0.any():
            if isinstance(signal_mask, slice):
                # Convert the slice into a mask before setting its values
                signal_mask = np.ones((self.data.shape[1]), dtype = 'bool')
            # Set rows summing zero as masked
            signal_mask[bH0] = False
            bH = bH[bH0 == False]
        self._root_aG = np.sqrt(aG)[:, np.newaxis]
        self._root_bH = np.sqrt(bH)[np.newaxis, :]
        dc[:,signal_mask][navigation_mask,:] = \
            (dc[:,signal_mask][navigation_mask,:] /
                (self._root_aG * self._root_bH))
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
        if return_masks is True:
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None
            return navigation_mask, signal_mask
Example #26
0
    def decomposition(self, normalize_poissonian_noise=False,
    algorithm = 'svd', output_dimension=None, centre = None,
    auto_transpose = True, navigation_mask=None, signal_mask=None,
    var_array=None, var_func=None, polyfit=None, on_peaks=False, 
    reproject=None, **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
        
        signal_mask : boolean numpy array
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_scores, plot_lev

        """
        # backup the original data
        if on_peaks:
            if hasattr(self.mapped_parameters,'peak_chars'):
                self._data_before_treatments = \
                    self.mapped_parameters.peak_chars.copy()
            else:
                print """No peak characteristics found.  You must run the 
                         peak_char_stack function to obtain these before 
                         you can run PCA or ICA on them."""
        else:
            self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        if hasattr(navigation_mask, 'ravel'):
            navigation_mask = navigation_mask.ravel()

        if hasattr(signal_mask, 'ravel'):
            signal_mask = signal_mask.ravel()

        # Normalize the poissonian noise
        # TODO this function can change the masks and this can cause
        # problems when reprojecting
        if normalize_poissonian_noise is True:
            if reproject is None:
                navigation_mask, signal_mask = \
                    self.normalize_poissonian_noise(
                                            navigation_mask=navigation_mask,
                                            signal_mask=signal_mask,
                                            return_masks = True)
            elif reproject == 'both':
                _, _ = \
                    self.normalize_poissonian_noise(return_masks = True)  
            elif reproject == 'navigation':
                _, signal_mask = \
                    self.normalize_poissonian_noise(return_masks = True,
                                                    signal_mask=signal_mask,) 
            elif reproject == 'signal':
                navigation_mask, _ = \
                    self.normalize_poissonian_noise(return_masks = True,
                                            navigation_mask=navigation_mask,)         
            
        messages.information('Performing decomposition analysis')
        if on_peaks:
            dc = self.mapped_parameters.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data
            
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        
        # Reset the explained_variance which is not set by all the algorithms
        explained_variance = None
        explained_variance_ratio = None
        mean = None
        
        if algorithm == 'svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:], centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'fast_svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:],
            fast = True, output_dimension = output_dimension, centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'sklearn_pca':    
            sk = sklearn.decomposition.PCA(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            explained_variance = sk.explained_variance_
            mean = sk.mean_
            centre = 'trials'   

        elif algorithm == 'nmf':    
            sk = sklearn.decomposition.NMF(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            
        elif algorithm == 'sparse_pca':
            sk = sklearn.decomposition.SparsePCA(output_dimension, **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T
            
        elif algorithm == 'mini_batch_sparse_pca':
            sk = sklearn.decomposition.MiniBatchSparsePCA(output_dimension,
                                                            **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc[:,signal_mask][navigation_mask,:]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            U,S,V,Sobj, ErrFlag = mlpca(
                dc[:,signal_mask][navigation_mask,:],
                var_array, output_dimension, fast = fast)
            scores = U * S
            factors = V
            explained_variance_ratio = S ** 2 / Sobj
            explained_variance = S ** 2 / len(factors)
        else:
            messages.information('Error: Algorithm not recognised. '
                                 'Nothing done')
            return False

        # We must calculate the ratio here because otherwise the sum information
        # can be lost if the user call crop_decomposition_dimension
        if explained_variance is not None and explained_variance_ratio is None:
            explained_variance_ratio = \
                explained_variance / explained_variance.sum()
                
        # Store the results in mva_results
        target.factors = factors
        target.scores = scores
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio
        target.decomposition_algorithm = algorithm
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4decomposition
        target.centre = centre
        target.mean = mean
        

        if output_dimension and factors.shape[1] != output_dimension:
            target.crop_decomposition_dimension(output_dimension)
        
        # Delete the unmixing information, because it'll refer to a previous
        # decompositions
        target.unmixing_matrix = None
        target.ica_algorithm = None

        if self._unfolded4decomposition is True:
            target.original_shape = self._shape_before_unfolding

        # Reproject
        if mean is None:
            mean = 0
        if reproject in ('navigation', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                scores_ = np.dot(dc[:,signal_mask] - mean, factors)
            else:
                scores_ = sk.transform(dc[:,signal_mask])
            target.scores = scores_
        if reproject in ('signal', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                factors = np.dot(np.linalg.pinv(scores), 
                                 dc[navigation_mask,:] - mean).T
                target.factors = factors
            else:
                messages.information("Reprojecting the signal is not yet "
                                     "supported for this algorithm")
                if reproject == 'both':
                    reproject = 'signal'
                else:
                    reproject = None
        
        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors[:] *= self._root_bH.T
            target.scores[:] *= self._root_aG
            
        # Set the pixels that were not processed to nan
        if not isinstance(signal_mask, slice):
            target.signal_mask = signal_mask
            if reproject not in ('both', 'signal'):
                factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                factors[signal_mask == True,:] = target.factors
                factors[signal_mask == False,:] = np.nan
                target.factors = factors
        if not isinstance(navigation_mask, slice):
            target.navigation_mask = navigation_mask
            if reproject not in ('both', 'navigation'):
                scores = np.zeros((dc.shape[0], target.scores.shape[1]))
                scores[navigation_mask == True,:] = target.scores
                scores[navigation_mask == False,:] = np.nan
                target.scores = scores

        #undo any pre-treatments
        self.undo_treatments(on_peaks)
        
        if self._unfolded4decomposition is True:
            self.fold()
            self._unfolded4decomposition is False
Example #27
0
    def principal_components_analysis(self,
                                      normalize_poissonian_noise=False,
                                      algorithm='svd',
                                      output_dimension=None,
                                      navigation_mask=None,
                                      signal_mask=None,
                                      center=False,
                                      variance2one=False,
                                      var_array=None,
                                      var_func=None,
                                      polyfit=None,
                                      on_peaks=False):
        """Principal components analysis.

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'}
        output_dimension : None or int
            number of PCA to keep
        navigation_mask : boolean numpy array
        signal_mask : boolean numpy array
        center : bool
            Perform energy centering before PCA
        variance2one : bool
            Perform whitening before PCA
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomy.
        polyfit :


        See also
        --------
        plot_principal_components, plot_principal_components_maps, plot_lev

        """
        # backup the original data
        if on_peaks:
            self._data_before_treatments = self.peak_chars.copy()
        else:
            self._data_before_treatments = self.data.copy()
        # Check for conflicting options and correct them when possible
        if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False:
            print \
            """
            The PCA algorithms from the MDP toolking (mdp and NIPALS)
            do not permit deactivating data centering.
            Therefore, the algorithm will proceed to center the data.
            """
            center = True
        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit(
                    "With the mlpca algorithm the output_dimension must be expecified"
                )

        if center is True and normalize_poissonian_noise is True:
            messages.warning(
                "Centering is not compatible with poissonian noise normalization\n"
                "Disabling centering")
            center = False

        if variance2one is True and normalize_poissonian_noise is True:
            messages.warning(
                "Variance normalization is not compatible with poissonian noise"
                "normalization.\n"
                "Disabling variance2one")
            variance2one = False

        # Apply pre-treatments
        # Centering
        if center is True:
            self.energy_center()
        # Variance normalization
        if variance2one is True:
            self.variance2one()
        # Transform the data in a line spectrum
        self._unfolded4pca = self.unfold_if_multidim()
        # Normalize the poissonian noise
        # Note that this function can change the masks
        if normalize_poissonian_noise is True:
            navigation_mask, signal_mask = \
                self.normalize_poissonian_noise(navigation_mask = navigation_mask,
                                                signal_mask = signal_mask,
                                                return_masks = True)

        navigation_mask = self._correct_navigation_mask_when_unfolded(
            navigation_mask)

        messages.information('Performing principal components analysis')

        if on_peaks:
            dc = self.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data.T.squeeze()
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        if algorithm == 'mdp' or algorithm == 'NIPALS':
            if algorithm == 'mdp':
                target.pca_node = mdp.nodes.PCANode(
                    output_dim=output_dimension, svd=True)
            elif algorithm == 'NIPALS':
                target.pca_node = mdp.nodes.NIPALSNode(
                    output_dim=output_dimension)
            # Train the node
            print "\nPerforming the PCA node training"
            print "This include variance normalizing"
            target.pca_node.train(dc[signal_mask, :][:, navigation_mask])
            print "Performing PCA projection"
            pc = target.pca_node.execute(dc[:, navigation_mask])
            pca_v = target.pca_node.v
            pca_V = target.pca_node.d
            target.output_dimension = output_dimension

        elif algorithm == 'svd':
            pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask])
            pc = np.dot(dc[:, navigation_mask], pca_v)
        elif algorithm == 'fast_svd':
            pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask],
                               fast=True,
                               output_dimension=output_dimension)
            pc = np.dot(dc[:, navigation_mask], pca_v)

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                    "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                                     'Supposing poissonian data')
                var_array = dc.squeeze()[signal_mask, :][:, navigation_mask]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                    "You have defined both the var_func and var_array keywords"
                    "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask, ...][:,
                                                              navigation_mask])
                else:
                    try:
                        var_array = np.polyval(
                            polyfit, dc[signal_mask, navigation_mask])
                    except:
                        messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            target.mlpca_output = mlpca(
                dc.squeeze()[signal_mask, :][:, navigation_mask],
                var_array.squeeze(),
                output_dimension,
                fast=fast)
            U, S, V, Sobj, ErrFlag = target.mlpca_output
            print "Performing PCA projection"
            pc = np.dot(dc[:, navigation_mask], V)
            pca_v = V
            pca_V = S**2

        if output_dimension:
            print "trimming to %i dimensions" % output_dimension
            pca_v = pca_v[:, :output_dimension]
            pca_V = pca_V[:output_dimension]
            pc = pc[:, :output_dimension]

        target.pc = pc
        target.v = pca_v
        target.V = pca_V
        target.pca_algorithm = algorithm
        target.centered = center
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4pca
        target.variance2one = variance2one

        if self._unfolded4pca is True:
            target.original_shape = self._shape_before_unfolding

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.pc[signal_mask, :] *= self._root_bH
            target.v *= self._root_aG.T
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None

        #undo any pre-treatments
        self.undo_treatments(on_peaks)

        # Set the pixels that were not processed to nan
        if navigation_mask is not None or not isinstance(
                navigation_mask, slice):
            v = np.zeros((dc.shape[1], target.v.shape[1]),
                         dtype=target.v.dtype)
            v[navigation_mask == False, :] = np.nan
            v[navigation_mask, :] = target.v
            target.v = v

        if self._unfolded4pca is True:
            self.fold()
            self._unfolded4pca is False
Example #28
0
    def principal_components_analysis(self, normalize_poissonian_noise = False,
    algorithm = 'svd', output_dimension = None, navigation_mask = None,
    signal_mask = None, center = False, variance2one = False, var_array = None,
    var_func = None, polyfit = None, on_peaks=False):
        """Principal components analysis.

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'}
        output_dimension : None or int
            number of PCA to keep
        navigation_mask : boolean numpy array
        signal_mask : boolean numpy array
        center : bool
            Perform energy centering before PCA
        variance2one : bool
            Perform whitening before PCA
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomy.
        polyfit :


        See also
        --------
        plot_principal_components, plot_principal_components_maps, plot_lev

        """
        # backup the original data
        if on_peaks:
            self._data_before_treatments = self.peak_chars.copy()
        else:
            self._data_before_treatments = self.data.copy()
        # Check for conflicting options and correct them when possible
        if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False:
            print \
            """
            The PCA algorithms from the MDP toolking (mdp and NIPALS)
            do not permit deactivating data centering.
            Therefore, the algorithm will proceed to center the data.
            """
            center = True
        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit(
                "With the mlpca algorithm the output_dimension must be expecified")

        if center is True and normalize_poissonian_noise is True:
            messages.warning(
            "Centering is not compatible with poissonian noise normalization\n"
            "Disabling centering")
            center = False

        if variance2one is True and normalize_poissonian_noise is True:
            messages.warning(
            "Variance normalization is not compatible with poissonian noise"
            "normalization.\n"
            "Disabling variance2one")
            variance2one = False

        # Apply pre-treatments
        # Centering
        if center is True:
            self.energy_center()
        # Variance normalization
        if variance2one is True:
            self.variance2one()
        # Transform the data in a line spectrum
        self._unfolded4pca = self.unfold_if_multidim()
        # Normalize the poissonian noise
        # Note that this function can change the masks
        if normalize_poissonian_noise is True:
            navigation_mask, signal_mask = \
                self.normalize_poissonian_noise(navigation_mask = navigation_mask,
                                                signal_mask = signal_mask,
                                                return_masks = True)

        navigation_mask = self._correct_navigation_mask_when_unfolded(navigation_mask)

        messages.information('Performing principal components analysis')

        if on_peaks:
            dc=self.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data.T.squeeze()
        #set the output target (peak results or not?)
        target=self._get_target(on_peaks)
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        if algorithm == 'mdp' or algorithm == 'NIPALS':
            if algorithm == 'mdp':
                target.pca_node = mdp.nodes.PCANode(
                output_dim=output_dimension, svd = True)
            elif algorithm == 'NIPALS':
                target.pca_node = mdp.nodes.NIPALSNode(
                output_dim=output_dimension)
            # Train the node
            print "\nPerforming the PCA node training"
            print "This include variance normalizing"
            target.pca_node.train(
                dc[signal_mask,:][:,navigation_mask])
            print "Performing PCA projection"
            pc = target.pca_node.execute(dc[:,navigation_mask])
            pca_v = target.pca_node.v
            pca_V = target.pca_node.d
            target.output_dimension = output_dimension

        elif algorithm == 'svd':
            pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask])
            pc = np.dot(dc[:,navigation_mask], pca_v)
        elif algorithm == 'fast_svd':
            pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask],
            fast = True, output_dimension = output_dimension)
            pc = np.dot(dc[:,navigation_mask], pca_v)

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc.squeeze()[signal_mask,:][:,navigation_mask]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            target.mlpca_output = mlpca(
                dc.squeeze()[signal_mask,:][:,navigation_mask],
                var_array.squeeze(), output_dimension, fast = fast)
            U,S,V,Sobj, ErrFlag  = target.mlpca_output
            print "Performing PCA projection"
            pc = np.dot(dc[:,navigation_mask], V)
            pca_v = V
            pca_V = S ** 2

        if output_dimension:
            print "trimming to %i dimensions"%output_dimension
            pca_v = pca_v[:,:output_dimension]
            pca_V = pca_V[:output_dimension]
            pc = pc[:,:output_dimension]

        target.pc = pc
        target.v = pca_v
        target.V = pca_V
        target.pca_algorithm = algorithm
        target.centered = center
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4pca
        target.variance2one = variance2one

        if self._unfolded4pca is True:
            target.original_shape = self._shape_before_unfolding

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.pc[signal_mask,:] *= self._root_bH
            target.v *= self._root_aG.T
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None

        #undo any pre-treatments
        self.undo_treatments(on_peaks)

        # Set the pixels that were not processed to nan
        if navigation_mask is not None or not isinstance(navigation_mask, slice):
            v = np.zeros((dc.shape[1], target.v.shape[1]),
                    dtype = target.v.dtype)
            v[navigation_mask == False,:] = np.nan
            v[navigation_mask,:] = target.v
            target.v = v

        if self._unfolded4pca is True:
            self.fold()
            self._unfolded4pca is False
Example #29
0
    def plot_maps(self,
                  components,
                  mva_type=None,
                  scores=None,
                  factors=None,
                  cmap=plt.cm.gray,
                  no_nans=False,
                  with_components=True,
                  plot=True,
                  on_peaks=False,
                  directory=None):
        """
        Plot component maps for the different MSA types

        Parameters
        ----------
        components : None, int, or list of ints
            if None, returns maps of all components.
            if int, returns maps of components with ids from 0 to given int.
            if list of ints, returns maps of components with ids in given list.
        mva_type: string, currently either 'pca' or 'ica'
        scores: numpy array, the array of score maps
        factors: numpy array, the array of components, with each column as a component.
        cmap: matplotlib colormap instance
        no_nans: bool,
        with_components: bool,
        plot: bool,
        """
        from hyperspy.signals.image import Image
        from hyperspy.signals.spectrum import Spectrum

        target = self._get_target(on_peaks)

        if scores is None or (factors is None and with_components is True):
            print "Either recmatrix or components were not provided."
            print "Loading existing values from object."
            if mva_type is None:
                print "Neither scores nor analysis type specified.  Cannot proceed."
                return

            elif mva_type.lower() == 'pca':
                scores = target.v.T
                factors = target.pc
            elif mva_type.lower() == 'ica':
                scores = self._get_ica_scores(target)
                factors = target.ic
                if no_nans:
                    print 'Removing NaNs for a visually prettier plot.'
                    scores = np.nan_to_num(scores)  # remove ugly NaN pixels
            else:
                print "No scores provided and analysis type '%s' unrecognized. Cannot proceed." % mva_type
                return


#        if len(self.axes_manager.axes)==2:
#            shape=self.data.shape[0],1
#        else:
#            shape=self.data.shape[0],self.data.shape[1]
        im_list = []

        if components is None:
            components = xrange(factors.shape[1])

        elif type(components).__name__ != 'list':
            components = xrange(components)

        for i in components:
            if plot is True:
                figure = plt.figure()
                if with_components:
                    ax = figure.add_subplot(121)
                    ax2 = figure.add_subplot(122)
                else:
                    ax = figure.add_subplot(111)
            if self.axes_manager.navigation_dimension == 2:
                toplot = scores[i, :].reshape(
                    self.axes_manager.navigation_shape)
                im_list.append(
                    Image({
                        'data':
                        toplot,
                        'axes':
                        self.axes_manager._get_non_slicing_axes_dicts()
                    }))
                if plot is True:
                    mapa = ax.matshow(toplot, cmap=cmap)
                    if with_components:
                        ax2.plot(self.axes_manager.axes[-1].axis, factors[:,
                                                                          i])
                        ax2.set_title('%s component %i' %
                                      (mva_type.upper(), i))
                        ax2.set_xlabel('Energy (eV)')
                    figure.colorbar(mapa)
                    figure.canvas.draw()
                    #pointer = widgets.DraggableSquare(self.coordinates)
                    #pointer.add_axes(ax)
            elif self.axes_manager.navigation_dimension == 1:
                toplot = scores[i, :]
                im_list.append(
                    Spectrum({
                        "data":
                        toplot,
                        'axes':
                        self.axes_manager._get_non_slicing_axes_dicts()
                    }))
                im_list[-1].get_dimensions_from_data()
                if plot is True:
                    ax.step(range(len(toplot)), toplot)

                    if with_components:
                        ax2.plot(self.axes_manager.axes[-1].axis, factors[:,
                                                                          i])
                        ax2.set_title('%s component %s' %
                                      (mva_type.upper(), i))
                        ax2.set_xlabel('Energy (eV)')
            else:
                messages.warning_exit('View not supported')
            if plot is True:
                ax.set_title('%s component number %s map' %
                             (mva_type.upper(), i))
                figure.canvas.draw()
                if directory is not None:
                    if not os.path.isdir(directory):
                        os.makedirs(directory)
                    figure.savefig(os.path.join(directory, 'IC-%i.png' % i),
                                   dpi=600)
        return im_list
Example #30
0
    def plot_maps(self, components, mva_type=None, scores=None, factors=None,
                  cmap=plt.cm.gray, no_nans=False, with_components=True,
                  plot=True, on_peaks=False, directory = None):
        """
        Plot component maps for the different MSA types

        Parameters
        ----------
        components : None, int, or list of ints
            if None, returns maps of all components.
            if int, returns maps of components with ids from 0 to given int.
            if list of ints, returns maps of components with ids in given list.
        mva_type: string, currently either 'pca' or 'ica'
        scores: numpy array, the array of score maps
        factors: numpy array, the array of components, with each column as a component.
        cmap: matplotlib colormap instance
        no_nans: bool,
        with_components: bool,
        plot: bool,
        """
        from hyperspy.signals.image import Image
        from hyperspy.signals.spectrum import Spectrum

        target=self._get_target(on_peaks)

        if scores is None or (factors is None and with_components is True):
            print "Either recmatrix or components were not provided."
            print "Loading existing values from object."
            if mva_type is None:
                print "Neither scores nor analysis type specified.  Cannot proceed."
                return

            elif mva_type.lower() == 'pca':
                scores=target.v.T
                factors=target.pc
            elif mva_type.lower() == 'ica':
                scores = self._get_ica_scores(target)
                factors=target.ic
                if no_nans:
                    print 'Removing NaNs for a visually prettier plot.'
                    scores = np.nan_to_num(scores) # remove ugly NaN pixels
            else:
                print "No scores provided and analysis type '%s' unrecognized. Cannot proceed."%mva_type
                return

#        if len(self.axes_manager.axes)==2:
#            shape=self.data.shape[0],1
#        else:
#            shape=self.data.shape[0],self.data.shape[1]
        im_list = []

        if components is None:
            components=xrange(factors.shape[1])

        elif type(components).__name__!='list':
            components=xrange(components)

        for i in components:
            if plot is True:
                figure = plt.figure()
                if with_components:
                    ax = figure.add_subplot(121)
                    ax2 = figure.add_subplot(122)
                else:
                    ax = figure.add_subplot(111)
            if self.axes_manager.navigation_dimension == 2:
                toplot = scores[i,:].reshape(self.axes_manager.navigation_shape)
                im_list.append(Image({'data' : toplot,
                    'axes' : self.axes_manager._get_non_slicing_axes_dicts()}))
                if plot is True:
                    mapa = ax.matshow(toplot, cmap = cmap)
                    if with_components:
                        ax2.plot(self.axes_manager.axes[-1].axis, factors[:,i])
                        ax2.set_title('%s component %i' % (mva_type.upper(),i))
                        ax2.set_xlabel('Energy (eV)')
                    figure.colorbar(mapa)
                    figure.canvas.draw()
                    #pointer = widgets.DraggableSquare(self.coordinates)
                    #pointer.add_axes(ax)
            elif self.axes_manager.navigation_dimension == 1:
                toplot = scores[i,:]
                im_list.append(Spectrum({"data" : toplot,
                    'axes' : self.axes_manager._get_non_slicing_axes_dicts()}))
                im_list[-1].get_dimensions_from_data()
                if plot is True:
                    ax.step(range(len(toplot)), toplot)

                    if with_components:
                        ax2.plot(self.axes_manager.axes[-1].axis, factors[:,i])
                        ax2.set_title('%s component %s' % (mva_type.upper(),i))
                        ax2.set_xlabel('Energy (eV)')
            else:
                messages.warning_exit('View not supported')
            if plot is True:
                ax.set_title('%s component number %s map' % (mva_type.upper(),i))
                figure.canvas.draw()
                if directory is not None:
                    if not os.path.isdir(directory):
                        os.makedirs(directory)
                    figure.savefig(os.path.join(directory, 'IC-%i.png' % i),
                              dpi = 600)
        return im_list
Example #31
0
    def normalize_poissonian_noise(self,
                                   navigation_mask=None,
                                   signal_mask=None,
                                   return_masks=False):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to
        "normalize" the poissonian data for PCA analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably) Poissonian noise")
        # If energy axis is not first, it needs to be for MVA.
        refold = self.unfold_if_multidim()
        dc = self.data.T.squeeze().copy()
        navigation_mask = \
            self._correct_navigation_mask_when_unfolded(navigation_mask)
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[signal_mask, :][:, navigation_mask].sum(0).squeeze()
        bH = dc[signal_mask, :][:, navigation_mask].sum(1).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
                "Data error: negative values\n"
                "Are you sure that the data follow a poissonian distribution?")
        # Update the spatial and energy masks so it does not include rows
        # or colums that sum zero.
        aG0 = (aG == 0)
        bH0 = (bH == 0)
        if aG0.any():
            if isinstance(navigation_mask, slice):
                # Convert the slice into a mask before setting its values
                navigation_mask = np.ones((self.data.shape[1]), dtype='bool')
            # Set colums summing zero as masked
            navigation_mask[aG0] = False
            aG = aG[aG0 == False]
        if bH0.any():
            if isinstance(signal_mask, slice):
                # Convert the slice into a mask before setting its values
                signal_mask = np.ones((self.data.shape[0]), dtype='bool')
            # Set rows summing zero as masked
            signal_mask[bH0] = False
            bH = bH[bH0 == False]
        self._root_aG = np.sqrt(aG)[np.newaxis, :]
        self._root_bH = np.sqrt(bH)[:, np.newaxis]
        temp = (dc[signal_mask, :][:, navigation_mask] /
                (self._root_aG * self._root_bH))
        if isinstance(signal_mask, slice) or isinstance(
                navigation_mask, slice):
            dc[signal_mask, navigation_mask] = temp
        else:
            mask3D = signal_mask[:, np.newaxis] * \
                navigation_mask[np.newaxis, :]
            dc[mask3D] = temp.ravel()
        # TODO - dc was never modifying self.data - was normalization ever
        # really getting applied?  Comment next lines as necessary.
        self.data = dc.T.copy()
        # end normalization write to self.data.
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
        if return_masks is True:
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None
            return navigation_mask, signal_mask
Example #32
0
    def normalize_poissonian_noise(self, navigation_mask = None,
                                   signal_mask = None, return_masks = False):
        """
        Scales the SI following Surf. Interface Anal. 2004; 36: 203–212 to
        "normalize" the poissonian data for PCA analysis

        Parameters
        ----------
        navigation_mask : boolen numpy array
        signal_mask  : boolen numpy array
        """
        messages.information(
            "Scaling the data to normalize the (presumably) Poissonian noise")
        # If energy axis is not first, it needs to be for MVA.
        refold = self.unfold_if_multidim()
        dc = self.data.T.squeeze().copy()
        navigation_mask = \
            self._correct_navigation_mask_when_unfolded(navigation_mask)
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        # Rescale the data to gaussianize the poissonian noise
        aG = dc[signal_mask,:][:,navigation_mask].sum(0).squeeze()
        bH = dc[signal_mask,:][:,navigation_mask].sum(1).squeeze()
        # Checks if any is negative
        if (aG < 0).any() or (bH < 0).any():
            messages.warning_exit(
            "Data error: negative values\n"
            "Are you sure that the data follow a poissonian distribution?")
        # Update the spatial and energy masks so it does not include rows
        # or colums that sum zero.
        aG0 = (aG == 0)
        bH0 = (bH == 0)
        if aG0.any():
            if isinstance(navigation_mask, slice):
                # Convert the slice into a mask before setting its values
                navigation_mask = np.ones((self.data.shape[1]),dtype = 'bool')
            # Set colums summing zero as masked
            navigation_mask[aG0] = False
            aG = aG[aG0 == False]
        if bH0.any():
            if isinstance(signal_mask, slice):
                # Convert the slice into a mask before setting its values
                signal_mask = np.ones((self.data.shape[0]), dtype = 'bool')
            # Set rows summing zero as masked
            signal_mask[bH0] = False
            bH = bH[bH0 == False]
        self._root_aG = np.sqrt(aG)[np.newaxis,:]
        self._root_bH = np.sqrt(bH)[:, np.newaxis]
        temp = (dc[signal_mask,:][:,navigation_mask] /
                (self._root_aG * self._root_bH))
        if  isinstance(signal_mask,slice) or isinstance(navigation_mask,slice):
            dc[signal_mask,navigation_mask] = temp
        else:
            mask3D = signal_mask[:, np.newaxis] * \
                navigation_mask[np.newaxis, :]
            dc[mask3D] = temp.ravel()
        # TODO - dc was never modifying self.data - was normalization ever
        # really getting applied?  Comment next lines as necessary.
        self.data = dc.T.copy()
        # end normalization write to self.data.
        if refold is True:
            print "Automatically refolding the SI after scaling"
            self.fold()
        if return_masks is True:
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None
            return navigation_mask, signal_mask
Example #33
0
    def decomposition(self,
        normalize_poissonian_noise=False,
        algorithm = 'svd',
        output_dimension=None,
        centre=None,
        auto_transpose=True,
        navigation_mask=None,
        signal_mask=None,
        var_array=None,
        var_func=None,
        polyfit=None,
        reproject=None,
        **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the 
            decompostion.
        
        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the 
            decomposition.
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']: # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float type.'
                ' You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                                        navigation_mask=navigation_mask,
                                        signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')

            dc = self.data
            #set the output target (peak results or not?)
            target = self.learning_results
            
            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask
                
            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the 
            # stored value (at the end of the method) coincides with the 
            # input masks
            
            # Reset the explained_variance which is not set by all the 
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None
            
            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:], centre = centre,
                    auto_transpose = auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'   

            elif algorithm == 'nmf':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                
            elif algorithm == 'sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T
                
            elif algorithm == 'mini_batch_sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    messages.warning_exit(
                    "For MLPCA it is mandatory to define the "
                    "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                    'Supposing poissonian data')
                    var_array = dc[:,signal_mask][navigation_mask,:]

                if var_array is not None and var_func is not None:
                    messages.warning_exit(
                    "You have defined both the var_func and var_array "
                    "keywords."
                    "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask,...][:,navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(polyfit,dc[signal_mask,
                            navigation_mask])
                        except:
                            messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U,S,V,Sobj, ErrFlag = mlpca(
                    dc[:,signal_mask][navigation_mask,:],
                    var_array, output_dimension, fast = fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                     'Nothing done')

            # We must calculate the ratio here because otherwise the sum 
            # information can be lost if the user call 
            # crop_decomposition_dimension
            if explained_variance is not None and \
            explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()
                    
            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean
            

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)
            
            # Delete the unmixing information, because it'll refer to a previous
            # decompositions
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.mapped_parameters._internal_parameters.folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca', 
                                      'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:,signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:,signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings), 
                                     dc[navigation_mask,:] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None
            
            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG
                
            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask 
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask == True,:] = target.factors
                    factors[signal_mask == False,:] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros((dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask == True,:] = target.loadings
                    loadings[navigation_mask == False,:] = np.nan
                    target.loadings = loadings
        finally:
            #undo any pre-treatments
            self.undo_treatments()
            
            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Example #34
0
import os.path
import shutil
from hyperspy import messages

config_files = ['hyperspyrc', 'edges_db.csv']
data_path = os.sep.join([os.path.dirname(__file__), '..', 'data'])

if os.name == 'posix':
    config_path = os.path.join(os.path.expanduser('~'), '.hyperspy')
    os_name = 'posix'
elif os.name in ['nt', 'dos']:
    ##    appdata = os.environ['APPDATA']
    config_path = os.path.expanduser('~/.hyperspy')
    ##    if os.path.isdir(appdata) is False:
    ##        os.mkdir(appdata)
    ##    config_path = os.path.join(os.environ['APPDATA'], 'hyperspy')
    os_name = 'windows'
else:
    messages.warning_exit('Unsupported operating system: %s' % os.name)

if os.path.isdir(config_path) is False:
    messages.information("Creating config directory: %s" % config_path)
    os.mkdir(config_path)

for file in config_files:
    templates_file = os.path.join(data_path, file)
    config_file = os.path.join(config_path, file)
    if os.path.isfile(config_file) is False:
        messages.information("Setting configuration file: %s" % file)
        shutil.copy(templates_file, config_file)
Example #35
0
import os.path
import shutil
from hyperspy import messages

config_files = list()
data_path = os.sep.join([os.path.dirname(__file__), '..', 'data'])

if os.name == 'posix':
    config_path = os.path.join(os.path.expanduser('~'), '.hyperspy')
    os_name = 'posix'
elif os.name in ['nt', 'dos']:
##    appdata = os.environ['APPDATA']
    config_path = os.path.expanduser('~/.hyperspy')
# if os.path.isdir(appdata) is False:
# os.mkdir(appdata)
##    config_path = os.path.join(os.environ['APPDATA'], 'hyperspy')
    os_name = 'windows'
else:
    messages.warning_exit('Unsupported operating system: %s' % os.name)

if os.path.isdir(config_path) is False:
    messages.information("Creating config directory: %s" % config_path)
    os.mkdir(config_path)

for file in config_files:
    templates_file = os.path.join(data_path, file)
    config_file = os.path.join(config_path, file)
    if os.path.isfile(config_file) is False:
        messages.information("Setting configuration file: %s" % file)
        shutil.copy(templates_file, config_file)