Esempio n. 1
0
    def value2index(self, value):
        """Return the closest index to the given value if between the limits,
        otherwise it will return either the upper or lower limits

        Parameters
        ----------
        value : float

        Returns
        -------
        int
        """
        if value is None:
            return None
        else:
            index = int(round((value - self.offset) / \
            self.scale))
            if self.size > index >= 0:
                return index
            elif index < 0:
                messages.warning("The given value is below the axis limits")
                return 0
            else:
                messages.warning("The given value is above the axis limits")
                return int(self.size - 1)
Esempio n. 2
0
    def value2index(self, value):
        """Return the closest index to the given value if between the limits,
        otherwise it will return either the upper or lower limits

        Parameters
        ----------
        value : float

        Returns
        -------
        int
        """
        if value is None:
            return None
        else:
            index = int(round((value - self.offset) / \
            self.scale))
            if self.size > index >= 0:
                return index
            elif index < 0:
                messages.warning("The given value is below the axis limits")
                return 0
            else:
                messages.warning("The given value is above the axis limits")
                return int(self.size - 1)
Esempio n. 3
0
 def function(self, x):
     if self.interpolate is False:
         return self.array * self.intensity.value
     elif self._interpolation_ready is True:
         return self.interp(x - self.origin.value) * self.intensity.value
     else:
         messages.warning(
         'To use interpolation you must call prepare_interpolator first')
Esempio n. 4
0
 def function(self, x):
     if self.interpolate is False:
         return self.array * self.intensity.value
     elif self._interpolation_ready is True:
         return self.interp(x - self.origin.value) * self.intensity.value
     else:
         messages.warning(
             'To use interpolation you must call prepare_interpolator first'
         )
Esempio n. 5
0
 def two_area_background_estimation(self, E1=None, E2=None, 
     powerlaw=None):
     """Estimates the parameters of a power law background with the two
     area method.
     
     Parameters
     ----------
     E1 : float
     E2 : float
     powerlaw : PowerLaw component or None
         If None, it will try to guess the right component from the 
         background components of the model
         
     """
     ea = self.axis.axis[self.channel_switches]
     if E1 is None or E1 < ea[0]:
         E1 = ea[0]
     else:
         E1 = E1
     if E2 is None:
         if self.edges:
             i = 0
             while self.edges[i].onset_energy.value < E1 or \
             self.edges[i].active is False:
                 i += 1
             E2 = self.edges[i].onset_energy.value - \
             preferences.EELS.preedge_safe_window_width
         else:
             E2 = ea[-1]
     else:
         E2 = E2           
     if powerlaw is None:
         for component in self._background_components:
             if isinstance(component, components.PowerLaw):
                 if powerlaw is None:
                     powerlaw = component
                 else:
                     message.warning(
                     'There are more than two power law '
                     'background components defined in this model, ' 
                     'please use the powerlaw keyword to specify one'
                     ' of them')
                     return
                     
     
     if powerlaw.estimate_parameters(
         self.spectrum, E1, E2, False) is True:
         self.fetch_stored_values()
     else:
         messages.warning(
         "The power law background parameters could not "
         "be estimated.\n"
         "Try choosing a different energy range for the estimation")
         return
Esempio n. 6
0
 def two_area_background_estimation(self, E1=None, E2=None, 
     powerlaw=None):
     """Estimates the parameters of a power law background with the two
     area method.
     
     Parameters
     ----------
     E1 : float
     E2 : float
     powerlaw : PowerLaw component or None
         If None, it will try to guess the right component from the 
         background components of the model
         
     """
     ea = self.axis.axis[self.channel_switches]
     if E1 is None or E1 < ea[0]:
         E1 = ea[0]
     else:
         E1 = E1
     if E2 is None:
         if self.edges:
             i = 0
             while self.edges[i].onset_energy.value < E1 or \
             self.edges[i].active is False:
                 i += 1
             E2 = self.edges[i].onset_energy.value - \
             preferences.EELS.preedge_safe_window_width
         else:
             E2 = ea[-1]
     else:
         E2 = E2           
     if powerlaw is None:
         for component in self._background_components:
             if isinstance(component, components.PowerLaw):
                 if powerlaw is None:
                     powerlaw = component
                 else:
                     message.warning(
                     'There are more than two power law '
                     'background components defined in this model, ' 
                     'please use the powerlaw keyword to specify one'
                     ' of them')
                     return
                     
     
     if powerlaw.estimate_parameters(
         self.spectrum, E1, E2, False) is True:
         self.fetch_stored_values()
     else:
         messages.warning(
         "The power law background parameters could not "
         "be estimated.\n"
         "Try choosing a different energy range for the estimation")
         return
Esempio n. 7
0
    def _touch(self):
        """Run model setup tasks
        
        This function must be called everytime that we add or remove components
        from the model.
        It creates the bookmarks self.edges and sef._background_components and 
        configures the edges by setting the energy_scale attribute and setting 
        the fine structure.
        """
        self._Model__touch()
        self.edges = []
        self._background_components = []
        for component in self:
            if isinstance(component, EELSCLEdge):
                component.set_microscope_parameters(
                    E0=self.spectrum.mapped_parameters.TEM.beam_energy,
                    alpha=self.spectrum.mapped_parameters.TEM.
                    convergence_angle,
                    beta=self.spectrum.mapped_parameters.TEM.EELS.
                    collection_angle,
                    energy_scale=self.axis.scale)
                component.energy_scale = self.axis.scale
                component.setfslist()
                if component.edge_position() < \
                self.axis.axis[self.channel_switches][0]:
                    component.isbackground = True
                if component.isbackground is not True:
                    self.edges.append(component)
                else:
                    component.fs_state = False
                    component.fslist.free = False
                    component.backgroundtype = "edge"
                    self._background_components.append(component)

            elif isinstance(component,
                            PowerLaw) or component.isbackground is True:
                self._background_components.append(component)

        if not self.edges:
            messages.warning("The model contains no edges")
        else:
            self.edges.sort(key=EELSCLEdge.edge_position)
            self.resolve_fine_structure()
        if len(self._background_components) > 1:
            self._backgroundtype = "mix"
        elif not self._background_components:
            messages.warning("No background model has been defined")
        else:
            self._backgroundtype = \
            self._background_components[0].__repr__()
            if self._firstimetouch and self.edges:
                self.two_area_background_estimation()
                self._firstimetouch = False
Esempio n. 8
0
    def _touch(self):
        """Run model setup tasks
        
        This function must be called everytime that we add or remove components
        from the model.
        It creates the bookmarks self.edges and sef._background_components and 
        configures the edges by setting the energy_scale attribute and setting 
        the fine structure.
        """
        self._Model__touch()
        self.edges = []
        self._background_components = []
        for component in self:
            if isinstance(component,EELSCLEdge):
                component.set_microscope_parameters(
                E0 = self.spectrum.mapped_parameters.TEM.beam_energy, 
                alpha = self.spectrum.mapped_parameters.TEM.convergence_angle,
                beta = self.spectrum.mapped_parameters.TEM.EELS.collection_angle, 
                energy_scale = self.axis.scale)
                component.energy_scale = self.axis.scale
                component.setfslist()
                if component.edge_position() < \
                self.axis.axis[self.channel_switches][0]:
                    component.isbackground = True
                if component.isbackground is not True:
                    self.edges.append(component)
                else :
                    component.fs_state = False
                    component.fslist.free = False
                    component.backgroundtype = "edge"
                    self._background_components.append(component)

            elif isinstance(component,PowerLaw) or component.isbackground is True:
                self._background_components.append(component)

        if not self.edges:
            messages.warning("The model contains no edges")
        else:
            self.edges.sort(key = EELSCLEdge.edge_position)
            self.resolve_fine_structure()
        if len(self._background_components) > 1 :
            self._backgroundtype = "mix"
        elif not self._background_components:
            messages.warning("No background model has been defined")
        else :
            self._backgroundtype = \
            self._background_components[0].__repr__()
            if self._firstimetouch and self.edges:
                self.two_area_background_estimation()
                self._firstimetouch = False
Esempio n. 9
0
    def two_area_background_estimation(self, E1=None, E2=None, powerlaw=None):
        """Estimates the parameters of a power law background with the two
        area method.

        Parameters
        ----------
        E1 : float
        E2 : float
        powerlaw : PowerLaw component or None
            If None, it will try to guess the right component from the
            background components of the model

        """
        if powerlaw is None:
            for component in self._active_background_components:
                if isinstance(component, components.PowerLaw):
                    if powerlaw is None:
                        powerlaw = component
                    else:
                        messages.warning(
                            'There are more than two power law '
                            'background components defined in this model, '
                            'please use the powerlaw keyword to specify one'
                            ' of them')
                        return
                else:  # No power law component
                    return

        ea = self.axis.axis[self.channel_switches]
        E1 = self._get_start_energy(E1)
        if E2 is None:
            E2 = self._get_first_ionization_edge_energy(start_energy=E1)
            if E2 is None:
                E2 = ea[-1]
            else:
                E2 = E2 - \
                    preferences.EELS.preedge_safe_window_width

        if not powerlaw.estimate_parameters(
                self.spectrum, E1, E2, only_current=False):
            messages.warning(
                "The power law background parameters could not "
                "be estimated.\n"
                "Try choosing a different energy range for the estimation")
            return
Esempio n. 10
0
    def two_area_background_estimation(self, E1=None, E2=None, powerlaw=None):
        """Estimates the parameters of a power law background with the two
        area method.

        Parameters
        ----------
        E1 : float
        E2 : float
        powerlaw : PowerLaw component or None
            If None, it will try to guess the right component from the
            background components of the model

        """
        if powerlaw is None:
            for component in self._active_background_components:
                if isinstance(component, components.PowerLaw):
                    if powerlaw is None:
                        powerlaw = component
                    else:
                        messages.warning(
                            "There are more than two power law "
                            "background components defined in this model, "
                            "please use the powerlaw keyword to specify one"
                            " of them"
                        )
                        return
                else:  # No power law component
                    return

        ea = self.axis.axis[self.channel_switches]
        E1 = self._get_start_energy(E1)
        if E2 is None:
            E2 = self._get_first_ionization_edge_energy(start_energy=E1)
            if E2 is None:
                E2 = ea[-1]
            else:
                E2 = E2 - preferences.EELS.preedge_safe_window_width

        if not powerlaw.estimate_parameters(self.spectrum, E1, E2, only_current=False):
            messages.warning(
                "The power law background parameters could not "
                "be estimated.\n"
                "Try choosing a different energy range for the estimation"
            )
            return
Esempio n. 11
0
    def two_area_background_estimation(self, E1=None, E2=None):
        """
        Estimates the parameters of a power law background with the two
        area method.
        """
        ea = self.axis.axis[self.channel_switches]
        if E1 is None or E1 < ea[0]:
            E1 = ea[0]
        else:
            E1 = E1
        if E2 is None:
            if self.edges:
                i = 0
                while self.edges[i].edge_position() < E1 or \
                self.edges[i].active is False:
                    i += 1
                E2 = self.edges[i].edge_position() - \
                defaults.preedge_safe_window_width
            else:
                E2 = ea[-1]
        else:
            E2 = E2
        print \
        "Estimating the parameters of the background by the two area method"
        print "E1 = %s\t E2 = %s" % (E1, E2)

        try:
            estimation = utils.two_area_powerlaw_estimation(
                self.spectrum, E1, E2)
            bg = self._background_components[0]
            bg.A.map['is_set'][:] = True
            bg.r.map['is_set'][:] = True
            bg.r.map['values'] = estimation['r']
            bg.A.map['values'] = estimation['A']
            self.charge()
        except ValueError:
            messages.warning(
                "The power law background parameters could not be estimated\n"
                "Try choosing a different energy range for the estimation")
Esempio n. 12
0
    def two_area_background_estimation(self, E1 = None, E2 = None):
        """
        Estimates the parameters of a power law background with the two
        area method.
        """
        ea = self.axis.axis[self.channel_switches]
        if E1 is None or E1 < ea[0]:
            E1 = ea[0]
        else:
            E1 = E1
        if E2 is None:
            if self.edges:
                i = 0
                while self.edges[i].edge_position() < E1 or \
                self.edges[i].active is False:
                    i += 1
                E2 = self.edges[i].edge_position() - \
                defaults.preedge_safe_window_width
            else:
                E2 = ea[-1]
        else:
            E2 = E2           
        print \
        "Estimating the parameters of the background by the two area method"
        print "E1 = %s\t E2 = %s" % (E1, E2)

        try:
            estimation = utils.two_area_powerlaw_estimation(self.spectrum,E1,E2)
            bg = self._background_components[0]
            bg.A.map['is_set'][:] = True
            bg.r.map['is_set'][:] = True
            bg.r.map['values'] = estimation['r']
            bg.A.map['values'] = estimation['A']
            self.charge()
        except ValueError:
            messages.warning(
            "The power law background parameters could not be estimated\n"
            "Try choosing a different energy range for the estimation")
Esempio n. 13
0
def load(*filenames, **kwds):
    """
    Load potentially multiple supported file into an hyperspy structure
    Supported formats: netCDF, msa, Gatan dm3, Ripple (rpl+raw)
    FEI ser and emi and hdf5.
    
    If no parameter is passed and the interactive mode is enabled the a window 
    ui is raised.
    
    Parameters
    ----------
    *filenames : if multiple file names are passed in, they get aggregated to
    a Signal class that has members for each file, plus a data set that
    consists of stacked input files. That stack has one dimension more than
    the input files. All files must match in size, number of dimensions, and
    type/extension.

    record_by : Str 
        Manually set the way in which the data will be read. Possible values are
        'spectrum' or 'image'. Please note that most of the times it is better 
        to leave Hyperspy to decide this.
        
    signal_type : Str
        Manually set the signal type of the data. Although only setting signal 
        type to 'EELS' will currently change the way the data is loaded, it is 
        good practice to set this parameter so it can be stored when saving the 
        file. Please note that, if the signal_type is already defined in the 
        file the information will be overriden without warning.

    Example usage:
        Loading a single file providing the signal type:
            d=load('file.dm3', signal_type = 'XPS')
        Loading a single file and overriding its default record_by:
            d=load('file.dm3', record_by='Image')
        Loading multiple files:
            d=load('file1.dm3','file2.dm3')

    """

    if len(filenames)<1 and hyperspy.defaults_parser.preferences.General.interactive is True:
            load_ui = Load()
            load_ui.edit_traits()
            if load_ui.filename:
                filenames = (load_ui.filename,)
    if len(filenames)<1:
        messages.warning('No file provided to reader.')
        return None
    elif len(filenames)==1:
        if '*' in filenames[0]:
            from glob import glob
            filenames=sorted(glob(filenames[0]))
        else:
            f=load_single_file(filenames[0], **kwds)
            return f
    import hyperspy.signals.aggregate as agg
    objects=[load_single_file(filename, output_level=0, is_agg = True, **kwds) 
        for filename in filenames]

    obj_type=objects[0].mapped_parameters.record_by
    if obj_type=='image':
        if len(objects[0].data.shape)==3:
            # feeding 3d objects creates cell stacks
            agg_sig=agg.AggregateCells(*objects)
        else:
            agg_sig=agg.AggregateImage(*objects)
    elif 'spectrum' in obj_type:
        agg_sig=agg.AggregateSpectrum(*objects)
    else:
        agg_sig=agg.Aggregate(*objects)
    if hyperspy.defaults_parser.preferences.General.plot_on_load is True:
        agg_sig.plot()
    return agg_sig
Esempio n. 14
0
    def decomposition(self, normalize_poissonian_noise=False,
    algorithm = 'svd', output_dimension=None, centre = None,
    auto_transpose = True, navigation_mask=None, signal_mask=None,
    var_array=None, var_func=None, polyfit=None, on_peaks=False, 
    reproject=None, **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
        
        signal_mask : boolean numpy array
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_scores, plot_lev

        """
        # backup the original data
        if on_peaks:
            if hasattr(self.mapped_parameters,'peak_chars'):
                self._data_before_treatments = \
                    self.mapped_parameters.peak_chars.copy()
            else:
                print """No peak characteristics found.  You must run the 
                         peak_char_stack function to obtain these before 
                         you can run PCA or ICA on them."""
        else:
            self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        if hasattr(navigation_mask, 'ravel'):
            navigation_mask = navigation_mask.ravel()

        if hasattr(signal_mask, 'ravel'):
            signal_mask = signal_mask.ravel()

        # Normalize the poissonian noise
        # TODO this function can change the masks and this can cause
        # problems when reprojecting
        if normalize_poissonian_noise is True:
            if reproject is None:
                navigation_mask, signal_mask = \
                    self.normalize_poissonian_noise(
                                            navigation_mask=navigation_mask,
                                            signal_mask=signal_mask,
                                            return_masks = True)
            elif reproject == 'both':
                _, _ = \
                    self.normalize_poissonian_noise(return_masks = True)  
            elif reproject == 'navigation':
                _, signal_mask = \
                    self.normalize_poissonian_noise(return_masks = True,
                                                    signal_mask=signal_mask,) 
            elif reproject == 'signal':
                navigation_mask, _ = \
                    self.normalize_poissonian_noise(return_masks = True,
                                            navigation_mask=navigation_mask,)         
            
        messages.information('Performing decomposition analysis')
        if on_peaks:
            dc = self.mapped_parameters.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data
            
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        
        # Reset the explained_variance which is not set by all the algorithms
        explained_variance = None
        explained_variance_ratio = None
        mean = None
        
        if algorithm == 'svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:], centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'fast_svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:],
            fast = True, output_dimension = output_dimension, centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'sklearn_pca':    
            sk = sklearn.decomposition.PCA(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            explained_variance = sk.explained_variance_
            mean = sk.mean_
            centre = 'trials'   

        elif algorithm == 'nmf':    
            sk = sklearn.decomposition.NMF(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            
        elif algorithm == 'sparse_pca':
            sk = sklearn.decomposition.SparsePCA(output_dimension, **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T
            
        elif algorithm == 'mini_batch_sparse_pca':
            sk = sklearn.decomposition.MiniBatchSparsePCA(output_dimension,
                                                            **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc[:,signal_mask][navigation_mask,:]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            U,S,V,Sobj, ErrFlag = mlpca(
                dc[:,signal_mask][navigation_mask,:],
                var_array, output_dimension, fast = fast)
            scores = U * S
            factors = V
            explained_variance_ratio = S ** 2 / Sobj
            explained_variance = S ** 2 / len(factors)
        else:
            messages.information('Error: Algorithm not recognised. '
                                 'Nothing done')
            return False

        # We must calculate the ratio here because otherwise the sum information
        # can be lost if the user call crop_decomposition_dimension
        if explained_variance is not None and explained_variance_ratio is None:
            explained_variance_ratio = \
                explained_variance / explained_variance.sum()
                
        # Store the results in mva_results
        target.factors = factors
        target.scores = scores
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio
        target.decomposition_algorithm = algorithm
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4decomposition
        target.centre = centre
        target.mean = mean
        

        if output_dimension and factors.shape[1] != output_dimension:
            target.crop_decomposition_dimension(output_dimension)
        
        # Delete the unmixing information, because it'll refer to a previous
        # decompositions
        target.unmixing_matrix = None
        target.ica_algorithm = None

        if self._unfolded4decomposition is True:
            target.original_shape = self._shape_before_unfolding

        # Reproject
        if mean is None:
            mean = 0
        if reproject in ('navigation', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                scores_ = np.dot(dc[:,signal_mask] - mean, factors)
            else:
                scores_ = sk.transform(dc[:,signal_mask])
            target.scores = scores_
        if reproject in ('signal', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                factors = np.dot(np.linalg.pinv(scores), 
                                 dc[navigation_mask,:] - mean).T
                target.factors = factors
            else:
                messages.information("Reprojecting the signal is not yet "
                                     "supported for this algorithm")
                if reproject == 'both':
                    reproject = 'signal'
                else:
                    reproject = None
        
        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors[:] *= self._root_bH.T
            target.scores[:] *= self._root_aG
            
        # Set the pixels that were not processed to nan
        if not isinstance(signal_mask, slice):
            target.signal_mask = signal_mask
            if reproject not in ('both', 'signal'):
                factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                factors[signal_mask == True,:] = target.factors
                factors[signal_mask == False,:] = np.nan
                target.factors = factors
        if not isinstance(navigation_mask, slice):
            target.navigation_mask = navigation_mask
            if reproject not in ('both', 'navigation'):
                scores = np.zeros((dc.shape[0], target.scores.shape[1]))
                scores[navigation_mask == True,:] = target.scores
                scores[navigation_mask == False,:] = np.nan
                target.scores = scores

        #undo any pre-treatments
        self.undo_treatments(on_peaks)
        
        if self._unfolded4decomposition is True:
            self.fold()
            self._unfolded4decomposition is False
Esempio n. 15
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      centre=None,
                      auto_transpose=True,
                      navigation_mask=None,
                      signal_mask=None,
                      var_array=None,
                      var_func=None,
                      polyfit=None,
                      reproject=None,
                      **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise

        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'

        output_dimension : None or int
            number of components to keep/calculate

        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be
            performed in the 'trials' axis. It only has effect when using the
            svd or fast_svd algorithms

        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.

        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the
            decompostion.

        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the
            decomposition.

        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm

        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.

        polyfit :

        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']:  # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float '
                'type. You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return

        if self.axes_manager.navigation_size < 2:
            raise AttributeError("It is not possible to decompose a dataset "
                                 "with navigation_size < 2")
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                raise ValueError("With the mlpca algorithm the "
                                 "output_dimension must be expecified")

        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                    navigation_mask=navigation_mask,
                    signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')
            # The rest of the code assumes that the first data axis
            # is the navigation axis. We transpose the data if that is not the
            # case.
            dc = (self.data if self.axes_manager[0].index_in_array == 0
                  else self.data.T)
            # set the output target (peak results or not?)
            target = self.learning_results

            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask

            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the
            # stored value (at the end of the method) coincides with the
            # input masks

            # Reset the explained_variance which is not set by all the
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None

            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :], centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'

            elif algorithm == 'nmf':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T

            elif algorithm == 'sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T

            elif algorithm == 'mini_batch_sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    raise ValueError(
                        "For MLPCA it is mandatory to define the "
                        "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                                         'Supposing poissonian data')
                    var_array = dc[:, signal_mask][navigation_mask, :]

                if var_array is not None and var_func is not None:
                    raise ValueError(
                        "You have defined both the var_func and var_array "
                        "keywords."
                        "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask, ...][:, navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(
                                polyfit, dc[
                                    signal_mask, navigation_mask])
                        except:
                            raise ValueError(
                                'var_func must be either a function or an '
                                'array defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U, S, V, Sobj, ErrFlag = mlpca(
                    dc[:, signal_mask][navigation_mask, :],
                    var_array, output_dimension, fast=fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                 'Nothing done')

            # We must calculate the ratio here because otherwise the sum
            # information can be lost if the user call
            # crop_decomposition_dimension
            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)

            # Delete the unmixing information, because it'll refer to a
            # previous decomposition
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.metadata._HyperSpy.Folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:, signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:, signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings),
                                     dc[navigation_mask, :] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None

            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG

            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask, :] = target.factors
                    factors[~signal_mask, :] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros(
                        (dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask, :] = target.loadings
                    loadings[~navigation_mask, :] = np.nan
                    target.loadings = loadings
        finally:
            # undo any pre-treatments
            self.undo_treatments()

            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Esempio n. 16
0
no_netcdf_message = 'Warning! In order to enjoy the netCDF Read/Write feature, '
'at least one of this packages must be installed: '
'python-pupynere, python-netcdf or python-netcdf4'
try:
    from netCDF4 import Dataset
    which_netcdf = 'netCDF4'
except:
    try:
        from netCDF3 import Dataset
        which_netcdf = 'netCDF3'
    except:
        try:
            from Scientific.IO.NetCDF import NetCDFFile as Dataset
            which_netcdf = 'Scientific Python'
        except:
            messages.warning(no_netcdf_message)
    
# Plugin characteristics
# ----------------------
format_name = 'netCDF'
description = ''
full_suport = True
file_extensions = ('nc', 'NC')
default_extension = 0

# Reading features
reads_images = True
reads_spectrum = True
reads_spectrum_image = True
# Writing features
writes_images = False
Esempio n. 17
0
    def decomposition(self,
        normalize_poissonian_noise=False,
        algorithm = 'svd',
        output_dimension=None,
        centre=None,
        auto_transpose=True,
        navigation_mask=None,
        signal_mask=None,
        var_array=None,
        var_func=None,
        polyfit=None,
        reproject=None,
        **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the 
            decompostion.
        
        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the 
            decomposition.
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']: # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float type.'
                ' You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                                        navigation_mask=navigation_mask,
                                        signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')

            dc = self.data
            #set the output target (peak results or not?)
            target = self.learning_results
            
            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask
                
            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the 
            # stored value (at the end of the method) coincides with the 
            # input masks
            
            # Reset the explained_variance which is not set by all the 
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None
            
            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:], centre = centre,
                    auto_transpose = auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'   

            elif algorithm == 'nmf':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                
            elif algorithm == 'sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T
                
            elif algorithm == 'mini_batch_sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    messages.warning_exit(
                    "For MLPCA it is mandatory to define the "
                    "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                    'Supposing poissonian data')
                    var_array = dc[:,signal_mask][navigation_mask,:]

                if var_array is not None and var_func is not None:
                    messages.warning_exit(
                    "You have defined both the var_func and var_array "
                    "keywords."
                    "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask,...][:,navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(polyfit,dc[signal_mask,
                            navigation_mask])
                        except:
                            messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U,S,V,Sobj, ErrFlag = mlpca(
                    dc[:,signal_mask][navigation_mask,:],
                    var_array, output_dimension, fast = fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                     'Nothing done')

            # We must calculate the ratio here because otherwise the sum 
            # information can be lost if the user call 
            # crop_decomposition_dimension
            if explained_variance is not None and \
            explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()
                    
            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean
            

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)
            
            # Delete the unmixing information, because it'll refer to a previous
            # decompositions
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.mapped_parameters._internal_parameters.folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca', 
                                      'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:,signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:,signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings), 
                                     dc[navigation_mask,:] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None
            
            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG
                
            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask 
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask == True,:] = target.factors
                    factors[signal_mask == False,:] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros((dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask == True,:] = target.loadings
                    loadings[navigation_mask == False,:] = np.nan
                    target.loadings = loadings
        finally:
            #undo any pre-treatments
            self.undo_treatments()
            
            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Esempio n. 18
0
    def principal_components_analysis(self, normalize_poissonian_noise = False,
    algorithm = 'svd', output_dimension = None, navigation_mask = None,
    signal_mask = None, center = False, variance2one = False, var_array = None,
    var_func = None, polyfit = None, on_peaks=False):
        """Principal components analysis.

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'}
        output_dimension : None or int
            number of PCA to keep
        navigation_mask : boolean numpy array
        signal_mask : boolean numpy array
        center : bool
            Perform energy centering before PCA
        variance2one : bool
            Perform whitening before PCA
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomy.
        polyfit :


        See also
        --------
        plot_principal_components, plot_principal_components_maps, plot_lev

        """
        # backup the original data
        if on_peaks:
            self._data_before_treatments = self.peak_chars.copy()
        else:
            self._data_before_treatments = self.data.copy()
        # Check for conflicting options and correct them when possible
        if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False:
            print \
            """
            The PCA algorithms from the MDP toolking (mdp and NIPALS)
            do not permit deactivating data centering.
            Therefore, the algorithm will proceed to center the data.
            """
            center = True
        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit(
                "With the mlpca algorithm the output_dimension must be expecified")

        if center is True and normalize_poissonian_noise is True:
            messages.warning(
            "Centering is not compatible with poissonian noise normalization\n"
            "Disabling centering")
            center = False

        if variance2one is True and normalize_poissonian_noise is True:
            messages.warning(
            "Variance normalization is not compatible with poissonian noise"
            "normalization.\n"
            "Disabling variance2one")
            variance2one = False

        # Apply pre-treatments
        # Centering
        if center is True:
            self.energy_center()
        # Variance normalization
        if variance2one is True:
            self.variance2one()
        # Transform the data in a line spectrum
        self._unfolded4pca = self.unfold_if_multidim()
        # Normalize the poissonian noise
        # Note that this function can change the masks
        if normalize_poissonian_noise is True:
            navigation_mask, signal_mask = \
                self.normalize_poissonian_noise(navigation_mask = navigation_mask,
                                                signal_mask = signal_mask,
                                                return_masks = True)

        navigation_mask = self._correct_navigation_mask_when_unfolded(navigation_mask)

        messages.information('Performing principal components analysis')

        if on_peaks:
            dc=self.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data.T.squeeze()
        #set the output target (peak results or not?)
        target=self._get_target(on_peaks)
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        if algorithm == 'mdp' or algorithm == 'NIPALS':
            if algorithm == 'mdp':
                target.pca_node = mdp.nodes.PCANode(
                output_dim=output_dimension, svd = True)
            elif algorithm == 'NIPALS':
                target.pca_node = mdp.nodes.NIPALSNode(
                output_dim=output_dimension)
            # Train the node
            print "\nPerforming the PCA node training"
            print "This include variance normalizing"
            target.pca_node.train(
                dc[signal_mask,:][:,navigation_mask])
            print "Performing PCA projection"
            pc = target.pca_node.execute(dc[:,navigation_mask])
            pca_v = target.pca_node.v
            pca_V = target.pca_node.d
            target.output_dimension = output_dimension

        elif algorithm == 'svd':
            pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask])
            pc = np.dot(dc[:,navigation_mask], pca_v)
        elif algorithm == 'fast_svd':
            pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask],
            fast = True, output_dimension = output_dimension)
            pc = np.dot(dc[:,navigation_mask], pca_v)

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc.squeeze()[signal_mask,:][:,navigation_mask]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            target.mlpca_output = mlpca(
                dc.squeeze()[signal_mask,:][:,navigation_mask],
                var_array.squeeze(), output_dimension, fast = fast)
            U,S,V,Sobj, ErrFlag  = target.mlpca_output
            print "Performing PCA projection"
            pc = np.dot(dc[:,navigation_mask], V)
            pca_v = V
            pca_V = S ** 2

        if output_dimension:
            print "trimming to %i dimensions"%output_dimension
            pca_v = pca_v[:,:output_dimension]
            pca_V = pca_V[:output_dimension]
            pc = pc[:,:output_dimension]

        target.pc = pc
        target.v = pca_v
        target.V = pca_V
        target.pca_algorithm = algorithm
        target.centered = center
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4pca
        target.variance2one = variance2one

        if self._unfolded4pca is True:
            target.original_shape = self._shape_before_unfolding

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.pc[signal_mask,:] *= self._root_bH
            target.v *= self._root_aG.T
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None

        #undo any pre-treatments
        self.undo_treatments(on_peaks)

        # Set the pixels that were not processed to nan
        if navigation_mask is not None or not isinstance(navigation_mask, slice):
            v = np.zeros((dc.shape[1], target.v.shape[1]),
                    dtype = target.v.dtype)
            v[navigation_mask == False,:] = np.nan
            v[navigation_mask,:] = target.v
            target.v = v

        if self._unfolded4pca is True:
            self.fold()
            self._unfolded4pca is False
Esempio n. 19
0
                                 ripple, tiff)
io_plugins = [msa, digital_micrograph, fei, mrc, ripple, tiff]
try:
    from hyperspy.io_plugins import netcdf
    io_plugins.append(netcdf)
except ImportError:
    pass
    # NetCDF is obsolate and is only provided for users who have
    # old EELSLab files. Therefore, we print no message if it is not
    # available
    #~ messages.information('The NetCDF IO features are not available')

try:
    from hyperspy.io_plugins import hdf5
    io_plugins.append(hdf5)
except ImportError:
    messages.warning('The HDF5 IO features are not available. '
                     'It is highly reccomended to install h5py')

try:
    from hyperspy.io_plugins import image
    io_plugins.append(image)
except ImportError:
    messages.information('The Image (PIL) IO features are not available')

default_write_ext = set()
for plugin in io_plugins:
    if plugin.writes:
        default_write_ext.add(
            plugin.file_extensions[plugin.default_extension])
Esempio n. 20
0
    def principal_components_analysis(self,
                                      normalize_poissonian_noise=False,
                                      algorithm='svd',
                                      output_dimension=None,
                                      navigation_mask=None,
                                      signal_mask=None,
                                      center=False,
                                      variance2one=False,
                                      var_array=None,
                                      var_func=None,
                                      polyfit=None,
                                      on_peaks=False):
        """Principal components analysis.

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'}
        output_dimension : None or int
            number of PCA to keep
        navigation_mask : boolean numpy array
        signal_mask : boolean numpy array
        center : bool
            Perform energy centering before PCA
        variance2one : bool
            Perform whitening before PCA
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomy.
        polyfit :


        See also
        --------
        plot_principal_components, plot_principal_components_maps, plot_lev

        """
        # backup the original data
        if on_peaks:
            self._data_before_treatments = self.peak_chars.copy()
        else:
            self._data_before_treatments = self.data.copy()
        # Check for conflicting options and correct them when possible
        if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False:
            print \
            """
            The PCA algorithms from the MDP toolking (mdp and NIPALS)
            do not permit deactivating data centering.
            Therefore, the algorithm will proceed to center the data.
            """
            center = True
        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit(
                    "With the mlpca algorithm the output_dimension must be expecified"
                )

        if center is True and normalize_poissonian_noise is True:
            messages.warning(
                "Centering is not compatible with poissonian noise normalization\n"
                "Disabling centering")
            center = False

        if variance2one is True and normalize_poissonian_noise is True:
            messages.warning(
                "Variance normalization is not compatible with poissonian noise"
                "normalization.\n"
                "Disabling variance2one")
            variance2one = False

        # Apply pre-treatments
        # Centering
        if center is True:
            self.energy_center()
        # Variance normalization
        if variance2one is True:
            self.variance2one()
        # Transform the data in a line spectrum
        self._unfolded4pca = self.unfold_if_multidim()
        # Normalize the poissonian noise
        # Note that this function can change the masks
        if normalize_poissonian_noise is True:
            navigation_mask, signal_mask = \
                self.normalize_poissonian_noise(navigation_mask = navigation_mask,
                                                signal_mask = signal_mask,
                                                return_masks = True)

        navigation_mask = self._correct_navigation_mask_when_unfolded(
            navigation_mask)

        messages.information('Performing principal components analysis')

        if on_peaks:
            dc = self.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data.T.squeeze()
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        if algorithm == 'mdp' or algorithm == 'NIPALS':
            if algorithm == 'mdp':
                target.pca_node = mdp.nodes.PCANode(
                    output_dim=output_dimension, svd=True)
            elif algorithm == 'NIPALS':
                target.pca_node = mdp.nodes.NIPALSNode(
                    output_dim=output_dimension)
            # Train the node
            print "\nPerforming the PCA node training"
            print "This include variance normalizing"
            target.pca_node.train(dc[signal_mask, :][:, navigation_mask])
            print "Performing PCA projection"
            pc = target.pca_node.execute(dc[:, navigation_mask])
            pca_v = target.pca_node.v
            pca_V = target.pca_node.d
            target.output_dimension = output_dimension

        elif algorithm == 'svd':
            pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask])
            pc = np.dot(dc[:, navigation_mask], pca_v)
        elif algorithm == 'fast_svd':
            pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask],
                               fast=True,
                               output_dimension=output_dimension)
            pc = np.dot(dc[:, navigation_mask], pca_v)

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                    "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                                     'Supposing poissonian data')
                var_array = dc.squeeze()[signal_mask, :][:, navigation_mask]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                    "You have defined both the var_func and var_array keywords"
                    "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask, ...][:,
                                                              navigation_mask])
                else:
                    try:
                        var_array = np.polyval(
                            polyfit, dc[signal_mask, navigation_mask])
                    except:
                        messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            target.mlpca_output = mlpca(
                dc.squeeze()[signal_mask, :][:, navigation_mask],
                var_array.squeeze(),
                output_dimension,
                fast=fast)
            U, S, V, Sobj, ErrFlag = target.mlpca_output
            print "Performing PCA projection"
            pc = np.dot(dc[:, navigation_mask], V)
            pca_v = V
            pca_V = S**2

        if output_dimension:
            print "trimming to %i dimensions" % output_dimension
            pca_v = pca_v[:, :output_dimension]
            pca_V = pca_V[:output_dimension]
            pc = pc[:, :output_dimension]

        target.pc = pc
        target.v = pca_v
        target.V = pca_V
        target.pca_algorithm = algorithm
        target.centered = center
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4pca
        target.variance2one = variance2one

        if self._unfolded4pca is True:
            target.original_shape = self._shape_before_unfolding

        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.pc[signal_mask, :] *= self._root_bH
            target.v *= self._root_aG.T
            if isinstance(navigation_mask, slice):
                navigation_mask = None
            if isinstance(signal_mask, slice):
                signal_mask = None

        #undo any pre-treatments
        self.undo_treatments(on_peaks)

        # Set the pixels that were not processed to nan
        if navigation_mask is not None or not isinstance(
                navigation_mask, slice):
            v = np.zeros((dc.shape[1], target.v.shape[1]),
                         dtype=target.v.dtype)
            v[navigation_mask == False, :] = np.nan
            v[navigation_mask, :] = target.v
            target.v = v

        if self._unfolded4pca is True:
            self.fold()
            self._unfolded4pca is False