Example #1
0
    def blind_source_separation(self,
                                number_of_components=None,
                                algorithm='sklearn_fastica',
                                diff_order=1,
                                factors=None,
                                comp_list=None,
                                mask=None, 
                                on_loadings=False,
                                pretreatment=None,
                                **kwargs):
        """Blind source separation (BSS) on the result on the 
        decomposition.

        Available algorithms: FastICA, JADE, CuBICA, and TDSEP

        Parameters
        ----------
        number_of_components : int
            number of principal components to pass to the BSS algorithm
        algorithm : {FastICA, JADE, CuBICA, TDSEP}
        diff_order : int
            Sometimes it is convenient to perform the BSS on the derivative 
            of the signal. If diff_order is 0, the signal is not differentiated.
        factors : numpy.array
            Factors to decompose. If None, the BSS is performed on the result
            of a previous decomposition.
        comp_list : boolen numpy array
            choose the components to use by the boolean list. It permits
             to choose non contiguous components.
        mask : numpy boolean array with the same dimension as the signal
            If not None, the signal locations marked as True (masked) will 
            not be passed to the BSS algorithm.
        on_loadings : bool
            If True, perform the BSS on the loadings of a previous 
            decomposition. If False, performs it on the factors.
        pretreatment: dict
        
        **kwargs : extra key word arguments
            Any keyword arguments are passed to the BSS algorithm.
        
        """
        target=self.learning_results                
        if not hasattr(target, 'factors') or target.factors==None:
            raise AttributeError(
                'A decomposition must be performed before blind '
                'source seperation or factors must be provided.')
        else:
            if factors is None:
                if on_loadings:
                    factors = target.loadings
                else:
                    factors = target.factors
            bool_index = np.zeros((factors.shape[0]), dtype = 'bool')
            if number_of_components is not None:
                bool_index[:number_of_components] = True
            else:
                if target.output_dimension is not None:
                    number_of_components = target.output_dimension
                    bool_index[:number_of_components] = True

            if comp_list is not None:
                for ifactors in comp_list:
                    bool_index[ifactors] = True
                number_of_components = len(comp_list)
            factors = factors[:,bool_index]
                    
            if pretreatment is not None:
                from hyperspy._signals.spectrum import Spectrum
                sfactors = Spectrum(factors.T)
                if pretreatment['algorithm'] == 'savitzky_golay':
                    sfactors.smooth_savitzky_golay(
                        number_of_points=pretreatment[
                                'number_of_points'],
                        polynomial_order=pretreatment[
                                'polynomial_order'],
                        differential_order = diff_order)
                if pretreatment['algorithm'] == 'tv':
                    sfactors.smooth_tv(
                        smoothing_parameter= pretreatment[
                            'smoothing_parameter'],
                        differential_order = diff_order)
                factors = sfactors.data.T
                if pretreatment['algorithm'] == 'butter':
                    b, a = sp.signal.butter(pretreatment['order'],
                        pretreatment['cutoff'], pretreatment['type'])
                    for i in range(factors.shape[1]):
                        factors[:,i] = sp.signal.filtfilt(b,a,
                            factors[:,i])
            elif diff_order > 0:
                factors = np.diff(factors, diff_order, axis=0)
                    
            if mask is not None:
                factors = factors[~mask]

            # first center and scale the data
            factors,invsqcovmat = centering_and_whitening(factors)
            if algorithm == 'orthomax':
                _, unmixing_matrix = orthomax(factors, **kwargs)
                unmixing_matrix = unmixing_matrix.T
            
            elif algorithm == 'sklearn_fastica':
                #if sklearn_installed is False:
                    #raise ImportError(
                    #'sklearn is not installed. Nothing done')
                if 'tol' not in kwargs:
                    kwargs['tol'] = 1e-10
                target.bss_node = FastICA(
                    **kwargs)
                target.bss_node.whiten = False
                target.bss_node.fit(factors)
                unmixing_matrix = target.bss_node.unmixing_matrix_
            else:
                if mdp_installed is False:
                    raise ImportError(
                    'MDP is not installed. Nothing done')
                to_exec = 'target.bss_node=mdp.nodes.%sNode(' % algorithm
                for key, value in kwargs.iteritems():
                    to_exec += '%s=%s,' % (key, value)
                to_exec += ')'
                exec(to_exec)
                target.bss_node.train(factors)
                unmixing_matrix = target.bss_node.get_recmatrix()

            target.unmixing_matrix = np.dot(unmixing_matrix,invsqcovmat)
            self._unmix_factors(target)
            self._unmix_loadings(target)
            self._auto_reverse_bss_component(target)
            target.bss_algorithm = algorithm
Example #2
0
    def blind_source_separation(self,
                                number_of_components=None,
                                algorithm='sklearn_fastica',
                                diff_order=1,
                                diff_axes=None,
                                factors=None,
                                comp_list=None,
                                mask=None,
                                on_loadings=False,
                                pretreatment=None,
                                **kwargs):
        """Blind source separation (BSS) on the result on the
        decomposition.

        Available algorithms: FastICA, JADE, CuBICA, and TDSEP

        Parameters
        ----------
        number_of_components : int
            number of principal components to pass to the BSS algorithm
        algorithm : {FastICA, JADE, CuBICA, TDSEP}
        diff_order : int
            Sometimes it is convenient to perform the BSS on the derivative of
            the signal. If diff_order is 0, the signal is not differentiated.
        diff_axes : None or list of ints or strings
            If None, when `diff_order` is greater than 1 and `signal_dimension`
            (`navigation_dimension`) when `on_loadings` is False (True) is
            greater than 1, the differences are calculated across all
            signal (navigation) axes. Otherwise the axes can be specified in
            a list.
        factors : Signal or numpy array.
            Factors to decompose. If None, the BSS is performed on the
            factors of a previous decomposition. If a Signal instance the
            navigation dimension must be 1 and the size greater than 1. If a
            numpy array (deprecated) the factors are stored in a 2d array
            stacked over the last axis.
        comp_list : boolen numpy array
            choose the components to use by the boolean list. It permits
             to choose non contiguous components.
        mask : bool numpy array or Signal instance.
            If not None, the signal locations marked as True are masked. The
            mask shape must be equal to the signal shape
            (navigation shape) when `on_loadings` is False (True).
        on_loadings : bool
            If True, perform the BSS on the loadings of a previous
            decomposition. If False, performs it on the factors.
        pretreatment: dict

        **kwargs : extra key word arguments
            Any keyword arguments are passed to the BSS algorithm.

        """
        from hyperspy.signal import Signal
        from hyperspy._signals.spectrum import Spectrum

        lr = self.learning_results

        if factors is None:
            if not hasattr(lr, 'factors') or lr.factors is None:
                raise AttributeError(
                    'A decomposition must be performed before blind '
                    'source seperation or factors must be provided.')

            else:
                if on_loadings:
                    factors = self.get_decomposition_loadings()
                else:
                    factors = self.get_decomposition_factors()

        # Check factors
        if not isinstance(factors, Signal):
            if isinstance(factors, np.ndarray):
                warnings.warn(
                    "factors as numpy arrays will raise an error in "
                    "HyperSpy 0.9 and newer. From them on only passing "
                    "factors as HyperSpy Signal instances will be "
                    "supported.",
                    DeprecationWarning)
                # We proceed supposing that the factors are spectra stacked
                # over the last dimension to reproduce the deprecated
                # behaviour.
                # TODO: Don't forget to change `factors` docstring when
                # removing this.
                factors = Spectrum(factors.T)
            else:
                # Change next error message when removing the
                # DeprecationWarning
                raise ValueError(
                    "`factors` must be either a Signal instance or a "
                    "numpy array but an object of type %s was provided." %
                    type(factors))

        # Check factor dimensions
        if factors.axes_manager.navigation_dimension != 1:
            raise ValueError("`factors` must have navigation dimension"
                             "equal one, but the navigation dimension "
                             "of the given factors is %i." %
                             factors.axes_manager.navigation_dimension
                             )
        elif factors.axes_manager.navigation_size < 2:
            raise ValueError("`factors` must have navigation size"
                             "greater than one, but the navigation "
                             "size of the given factors is %i." %
                             factors.axes_manager.navigation_size)

        # Check mask dimensions
        if mask is not None:
            ref_shape, space = (factors.axes_manager.signal_shape,
                                "navigation" if on_loadings else "signal")
            if isinstance(mask, np.ndarray):
                warnings.warn(
                    "Bare numpy array masks are deprecated and will be removed"
                    " in next HyperSpy 0.9.",
                    DeprecationWarning)
                ref_shape = ref_shape[::-1]
                if mask.shape != ref_shape:
                    raise ValueError(
                        "The `mask` shape is not equal to the %s shape."
                        "Mask shape: %s\tSignal shape in array: %s" %
                        (space, str(mask.shape), str(ref_shape)))
                else:
                    if on_loadings:
                        mask = self._get_navigation_signal(data=mask)
                    else:
                        mask = self._get_signal_signal(data=mask)
            elif isinstance(mask, Signal):
                if mask.axes_manager.signal_shape != ref_shape:
                    raise ValueError(
                        "The `mask` signal shape is not equal to the %s shape."
                        " Mask shape: %s\t%s shape:%s" %
                        (space,
                         str(mask.axes_manager.signal_shape),
                         space,
                         str(ref_shape)))

        # Note that we don't check the factor's signal dimension. This is on
        # purpose as an user may like to apply pretreaments that change their
        # dimensionality.

        # The diff_axes are given for the main signal. We need to compute
        # the correct diff_axes for the factors.
        # Get diff_axes index in axes manager
        if diff_axes is not None:
            diff_axes = [1 + axis.index_in_axes_manager for axis in
                         [self.axes_manager[axis] for axis in diff_axes]]
            if not on_loadings:
                diff_axes = [index - self.axes_manager.navigation_dimension
                             for index in diff_axes]
        # Select components to separate
        if number_of_components is not None:
            comp_list = range(number_of_components)
        elif comp_list is not None:
            number_of_components = len(comp_list)
        else:
            if lr.output_dimension is not None:
                number_of_components = lr.output_dimension
                comp_list = range(number_of_components)
            else:
                raise ValueError(
                    "No `number_of_components` or `comp_list` provided.")
        factors = stack([factors.inav[i] for i in comp_list])

        # Apply differences pre-processing if requested.
        if diff_order > 0:
            factors = get_derivative(factors,
                                     diff_axes=diff_axes,
                                     diff_order=diff_order)
            if mask is not None:
                # The following is a little trick to dilate the mask as
                # required when operation on the differences. It exploits the
                # fact that np.diff autimatically "dilates" nans. The trick has
                # a memory penalty which should be low compare to the total
                # memory required for the core application in most cases.
                mask_diff_axes = (
                    [iaxis - 1 for iaxis in diff_axes]
                    if diff_axes is not None
                    else None)
                mask.change_dtype("float")
                mask.data[mask.data == 1] = np.nan
                mask = get_derivative(mask,
                                      diff_axes=mask_diff_axes,
                                      diff_order=diff_order)
                mask.data[np.isnan(mask.data)] = 1
                mask.change_dtype("bool")

        # Unfold in case the signal_dimension > 1
        factors.unfold()
        if mask is not None:
            mask.unfold()
            factors = factors.data.T[~mask.data]
        else:
            factors = factors.data.T

        # Center and scale the data
        factors, invsqcovmat = centering_and_whitening(factors)

        # Perform actual BSS
        if algorithm == 'orthomax':
            _, unmixing_matrix = orthomax(factors, **kwargs)
            unmixing_matrix = unmixing_matrix.T

        elif algorithm == 'sklearn_fastica':
            if not import_sklearn.sklearn_installed:
                raise ImportError(
                    "The optional package scikit learn is not installed "
                    "and it is required for this feature.")
            if 'tol' not in kwargs:
                kwargs['tol'] = 1e-10
            lr.bss_node = import_sklearn.FastICA(
                **kwargs)
            lr.bss_node.whiten = False
            lr.bss_node.fit(factors)
            try:
                unmixing_matrix = lr.bss_node.unmixing_matrix_
            except AttributeError:
                # unmixing_matrix was renamed to components
                unmixing_matrix = lr.bss_node.components_
        else:
            if mdp_installed is False:
                raise ImportError(
                    'MDP is not installed. Nothing done')
            temp_function = getattr(mdp.nodes, algorithm + "Node")
            lr.bss_node = temp_function(**kwargs)
            lr.bss_node.train(factors)
            unmixing_matrix = lr.bss_node.get_recmatrix()
        w = np.dot(unmixing_matrix, invsqcovmat)
        if lr.explained_variance is not None:
            # The output of ICA is not sorted in any way what makes it
            # difficult to compare results from different unmixings. The
            # following code is an experimental attempt to sort them in a
            # more predictable way
            sorting_indices = np.argsort(np.dot(
                lr.explained_variance[:number_of_components],
                np.abs(w.T)))[::-1]
            w[:] = w[sorting_indices, :]
        lr.unmixing_matrix = w
        lr.on_loadings = on_loadings
        self._unmix_components()
        self._auto_reverse_bss_component(lr)
        lr.bss_algorithm = algorithm
Example #3
0
    def blind_source_separation(self,
                                number_of_components=None,
                                algorithm='sklearn_fastica',
                                diff_order=1,
                                diff_axes=None,
                                factors=None,
                                comp_list=None,
                                mask=None,
                                on_loadings=False,
                                pretreatment=None,
                                **kwargs):
        """Blind source separation (BSS) on the result on the
        decomposition.

        Available algorithms: FastICA, JADE, CuBICA, and TDSEP

        Parameters
        ----------
        number_of_components : int
            number of principal components to pass to the BSS algorithm
        algorithm : {FastICA, JADE, CuBICA, TDSEP}
        diff_order : int
            Sometimes it is convenient to perform the BSS on the derivative of
            the signal. If diff_order is 0, the signal is not differentiated.
        diff_axes : None or list of ints or strings
            If None, when `diff_order` is greater than 1 and `signal_dimension`
            (`navigation_dimension`) when `on_loadings` is False (True) is
            greater than 1, the differences are calculated across all
            signal (navigation) axes. Otherwise the axes can be specified in
            a list.
        factors : Signal or numpy array.
            Factors to decompose. If None, the BSS is performed on the
            factors of a previous decomposition. If a Signal instance the
            navigation dimension must be 1 and the size greater than 1. If a
            numpy array (deprecated) the factors are stored in a 2d array
            stacked over the last axis.
        comp_list : boolen numpy array
            choose the components to use by the boolean list. It permits
             to choose non contiguous components.
        mask : bool numpy array or Signal instance.
            If not None, the signal locations marked as True are masked. The
            mask shape must be equal to the signal shape
            (navigation shape) when `on_loadings` is False (True).
        on_loadings : bool
            If True, perform the BSS on the loadings of a previous
            decomposition. If False, performs it on the factors.
        pretreatment: dict

        **kwargs : extra key word arguments
            Any keyword arguments are passed to the BSS algorithm.

        """
        from hyperspy.signal import Signal
        from hyperspy._signals.spectrum import Spectrum

        lr = self.learning_results

        if factors is None:
            if not hasattr(lr, 'factors') or lr.factors is None:
                raise AttributeError(
                    'A decomposition must be performed before blind '
                    'source seperation or factors must be provided.')

            else:
                if on_loadings:
                    factors = self.get_decomposition_loadings()
                else:
                    factors = self.get_decomposition_factors()

        # Check factors
        if not isinstance(factors, Signal):
            if isinstance(factors, np.ndarray):
                warnings.warn(
                    "factors as numpy arrays will raise an error in "
                    "HyperSpy 0.9 and newer. From them on only passing "
                    "factors as HyperSpy Signal instances will be "
                    "supported.", DeprecationWarning)
                # We proceed supposing that the factors are spectra stacked
                # over the last dimension to reproduce the deprecated
                # behaviour.
                # TODO: Don't forget to change `factors` docstring when
                # removing this.
                factors = Spectrum(factors.T)
            else:
                # Change next error message when removing the
                # DeprecationWarning
                raise ValueError(
                    "`factors` must be either a Signal instance or a "
                    "numpy array but an object of type %s was provided." %
                    type(factors))

        # Check factor dimensions
        if factors.axes_manager.navigation_dimension != 1:
            raise ValueError("`factors` must have navigation dimension"
                             "equal one, but the navigation dimension "
                             "of the given factors is %i." %
                             factors.axes_manager.navigation_dimension)
        elif factors.axes_manager.navigation_size < 2:
            raise ValueError("`factors` must have navigation size"
                             "greater than one, but the navigation "
                             "size of the given factors is %i." %
                             factors.axes_manager.navigation_size)

        # Check mask dimensions
        if mask is not None:
            ref_shape, space = (factors.axes_manager.signal_shape,
                                "navigation" if on_loadings else "signal")
            if isinstance(mask, np.ndarray):
                warnings.warn(
                    "Bare numpy array masks are deprecated and will be removed"
                    " in next HyperSpy 0.9.", DeprecationWarning)
                ref_shape = ref_shape[::-1]
                if mask.shape != ref_shape:
                    raise ValueError(
                        "The `mask` shape is not equal to the %s shape."
                        "Mask shape: %s\tSignal shape in array: %s" %
                        (space, str(mask.shape), str(ref_shape)))
                else:
                    if on_loadings:
                        mask = self._get_navigation_signal(data=mask)
                    else:
                        mask = self._get_signal_signal(data=mask)
            elif isinstance(mask, Signal):
                if mask.axes_manager.signal_shape != ref_shape:
                    raise ValueError(
                        "The `mask` signal shape is not equal to the %s shape."
                        " Mask shape: %s\t%s shape:%s" %
                        (space, str(mask.axes_manager.signal_shape), space,
                         str(ref_shape)))

        # Note that we don't check the factor's signal dimension. This is on
        # purpose as an user may like to apply pretreaments that change their
        # dimensionality.

        # The diff_axes are given for the main signal. We need to compute
        # the correct diff_axes for the factors.
        # Get diff_axes index in axes manager
        if diff_axes is not None:
            diff_axes = [
                1 + axis.index_in_axes_manager
                for axis in [self.axes_manager[axis] for axis in diff_axes]
            ]
            if not on_loadings:
                diff_axes = [
                    index - self.axes_manager.navigation_dimension
                    for index in diff_axes
                ]
        # Select components to separate
        if number_of_components is not None:
            comp_list = range(number_of_components)
        elif comp_list is not None:
            number_of_components = len(comp_list)
        else:
            if lr.output_dimension is not None:
                number_of_components = lr.output_dimension
                comp_list = range(number_of_components)
            else:
                raise ValueError(
                    "No `number_of_components` or `comp_list` provided.")
        factors = stack([factors.inav[i] for i in comp_list])

        # Apply differences pre-processing if requested.
        if diff_order > 0:
            factors = get_derivative(factors,
                                     diff_axes=diff_axes,
                                     diff_order=diff_order)
            if mask is not None:
                # The following is a little trick to dilate the mask as
                # required when operation on the differences. It exploits the
                # fact that np.diff autimatically "dilates" nans. The trick has
                # a memory penalty which should be low compare to the total
                # memory required for the core application in most cases.
                mask_diff_axes = ([iaxis - 1 for iaxis in diff_axes]
                                  if diff_axes is not None else None)
                mask.change_dtype("float")
                mask.data[mask.data == 1] = np.nan
                mask = get_derivative(mask,
                                      diff_axes=mask_diff_axes,
                                      diff_order=diff_order)
                mask.data[np.isnan(mask.data)] = 1
                mask.change_dtype("bool")

        # Unfold in case the signal_dimension > 1
        factors.unfold()
        if mask is not None:
            mask.unfold()
            factors = factors.data.T[~mask.data]
        else:
            factors = factors.data.T

        # Center and scale the data
        factors, invsqcovmat = centering_and_whitening(factors)

        # Perform actual BSS
        if algorithm == 'orthomax':
            _, unmixing_matrix = orthomax(factors, **kwargs)
            unmixing_matrix = unmixing_matrix.T

        elif algorithm == 'sklearn_fastica':
            if not import_sklearn.sklearn_installed:
                raise ImportError(
                    "The optional package scikit learn is not installed "
                    "and it is required for this feature.")
            if 'tol' not in kwargs:
                kwargs['tol'] = 1e-10
            lr.bss_node = import_sklearn.FastICA(**kwargs)
            lr.bss_node.whiten = False
            lr.bss_node.fit(factors)
            try:
                unmixing_matrix = lr.bss_node.unmixing_matrix_
            except AttributeError:
                # unmixing_matrix was renamed to components
                unmixing_matrix = lr.bss_node.components_
        else:
            if mdp_installed is False:
                raise ImportError('MDP is not installed. Nothing done')
            temp_function = getattr(mdp.nodes, algorithm + "Node")
            lr.bss_node = temp_function(**kwargs)
            lr.bss_node.train(factors)
            unmixing_matrix = lr.bss_node.get_recmatrix()
        w = np.dot(unmixing_matrix, invsqcovmat)
        if lr.explained_variance is not None:
            # The output of ICA is not sorted in any way what makes it
            # difficult to compare results from different unmixings. The
            # following code is an experimental attempt to sort them in a
            # more predictable way
            sorting_indices = np.argsort(
                np.dot(lr.explained_variance[:number_of_components],
                       np.abs(w.T)))[::-1]
            w[:] = w[sorting_indices, :]
        lr.unmixing_matrix = w
        lr.on_loadings = on_loadings
        self._unmix_components()
        self._auto_reverse_bss_component(lr)
        lr.bss_algorithm = algorithm