def blind_source_separation(self, number_of_components=None, algorithm='sklearn_fastica', diff_order=1, factors=None, comp_list=None, mask=None, on_loadings=False, pretreatment=None, **kwargs): """Blind source separation (BSS) on the result on the decomposition. Available algorithms: FastICA, JADE, CuBICA, and TDSEP Parameters ---------- number_of_components : int number of principal components to pass to the BSS algorithm algorithm : {FastICA, JADE, CuBICA, TDSEP} diff_order : int Sometimes it is convenient to perform the BSS on the derivative of the signal. If diff_order is 0, the signal is not differentiated. factors : numpy.array Factors to decompose. If None, the BSS is performed on the result of a previous decomposition. comp_list : boolen numpy array choose the components to use by the boolean list. It permits to choose non contiguous components. mask : numpy boolean array with the same dimension as the signal If not None, the signal locations marked as True (masked) will not be passed to the BSS algorithm. on_loadings : bool If True, perform the BSS on the loadings of a previous decomposition. If False, performs it on the factors. pretreatment: dict **kwargs : extra key word arguments Any keyword arguments are passed to the BSS algorithm. """ target=self.learning_results if not hasattr(target, 'factors') or target.factors==None: raise AttributeError( 'A decomposition must be performed before blind ' 'source seperation or factors must be provided.') else: if factors is None: if on_loadings: factors = target.loadings else: factors = target.factors bool_index = np.zeros((factors.shape[0]), dtype = 'bool') if number_of_components is not None: bool_index[:number_of_components] = True else: if target.output_dimension is not None: number_of_components = target.output_dimension bool_index[:number_of_components] = True if comp_list is not None: for ifactors in comp_list: bool_index[ifactors] = True number_of_components = len(comp_list) factors = factors[:,bool_index] if pretreatment is not None: from hyperspy._signals.spectrum import Spectrum sfactors = Spectrum(factors.T) if pretreatment['algorithm'] == 'savitzky_golay': sfactors.smooth_savitzky_golay( number_of_points=pretreatment[ 'number_of_points'], polynomial_order=pretreatment[ 'polynomial_order'], differential_order = diff_order) if pretreatment['algorithm'] == 'tv': sfactors.smooth_tv( smoothing_parameter= pretreatment[ 'smoothing_parameter'], differential_order = diff_order) factors = sfactors.data.T if pretreatment['algorithm'] == 'butter': b, a = sp.signal.butter(pretreatment['order'], pretreatment['cutoff'], pretreatment['type']) for i in range(factors.shape[1]): factors[:,i] = sp.signal.filtfilt(b,a, factors[:,i]) elif diff_order > 0: factors = np.diff(factors, diff_order, axis=0) if mask is not None: factors = factors[~mask] # first center and scale the data factors,invsqcovmat = centering_and_whitening(factors) if algorithm == 'orthomax': _, unmixing_matrix = orthomax(factors, **kwargs) unmixing_matrix = unmixing_matrix.T elif algorithm == 'sklearn_fastica': #if sklearn_installed is False: #raise ImportError( #'sklearn is not installed. Nothing done') if 'tol' not in kwargs: kwargs['tol'] = 1e-10 target.bss_node = FastICA( **kwargs) target.bss_node.whiten = False target.bss_node.fit(factors) unmixing_matrix = target.bss_node.unmixing_matrix_ else: if mdp_installed is False: raise ImportError( 'MDP is not installed. Nothing done') to_exec = 'target.bss_node=mdp.nodes.%sNode(' % algorithm for key, value in kwargs.iteritems(): to_exec += '%s=%s,' % (key, value) to_exec += ')' exec(to_exec) target.bss_node.train(factors) unmixing_matrix = target.bss_node.get_recmatrix() target.unmixing_matrix = np.dot(unmixing_matrix,invsqcovmat) self._unmix_factors(target) self._unmix_loadings(target) self._auto_reverse_bss_component(target) target.bss_algorithm = algorithm
def blind_source_separation(self, number_of_components=None, algorithm='sklearn_fastica', diff_order=1, diff_axes=None, factors=None, comp_list=None, mask=None, on_loadings=False, pretreatment=None, **kwargs): """Blind source separation (BSS) on the result on the decomposition. Available algorithms: FastICA, JADE, CuBICA, and TDSEP Parameters ---------- number_of_components : int number of principal components to pass to the BSS algorithm algorithm : {FastICA, JADE, CuBICA, TDSEP} diff_order : int Sometimes it is convenient to perform the BSS on the derivative of the signal. If diff_order is 0, the signal is not differentiated. diff_axes : None or list of ints or strings If None, when `diff_order` is greater than 1 and `signal_dimension` (`navigation_dimension`) when `on_loadings` is False (True) is greater than 1, the differences are calculated across all signal (navigation) axes. Otherwise the axes can be specified in a list. factors : Signal or numpy array. Factors to decompose. If None, the BSS is performed on the factors of a previous decomposition. If a Signal instance the navigation dimension must be 1 and the size greater than 1. If a numpy array (deprecated) the factors are stored in a 2d array stacked over the last axis. comp_list : boolen numpy array choose the components to use by the boolean list. It permits to choose non contiguous components. mask : bool numpy array or Signal instance. If not None, the signal locations marked as True are masked. The mask shape must be equal to the signal shape (navigation shape) when `on_loadings` is False (True). on_loadings : bool If True, perform the BSS on the loadings of a previous decomposition. If False, performs it on the factors. pretreatment: dict **kwargs : extra key word arguments Any keyword arguments are passed to the BSS algorithm. """ from hyperspy.signal import Signal from hyperspy._signals.spectrum import Spectrum lr = self.learning_results if factors is None: if not hasattr(lr, 'factors') or lr.factors is None: raise AttributeError( 'A decomposition must be performed before blind ' 'source seperation or factors must be provided.') else: if on_loadings: factors = self.get_decomposition_loadings() else: factors = self.get_decomposition_factors() # Check factors if not isinstance(factors, Signal): if isinstance(factors, np.ndarray): warnings.warn( "factors as numpy arrays will raise an error in " "HyperSpy 0.9 and newer. From them on only passing " "factors as HyperSpy Signal instances will be " "supported.", DeprecationWarning) # We proceed supposing that the factors are spectra stacked # over the last dimension to reproduce the deprecated # behaviour. # TODO: Don't forget to change `factors` docstring when # removing this. factors = Spectrum(factors.T) else: # Change next error message when removing the # DeprecationWarning raise ValueError( "`factors` must be either a Signal instance or a " "numpy array but an object of type %s was provided." % type(factors)) # Check factor dimensions if factors.axes_manager.navigation_dimension != 1: raise ValueError("`factors` must have navigation dimension" "equal one, but the navigation dimension " "of the given factors is %i." % factors.axes_manager.navigation_dimension ) elif factors.axes_manager.navigation_size < 2: raise ValueError("`factors` must have navigation size" "greater than one, but the navigation " "size of the given factors is %i." % factors.axes_manager.navigation_size) # Check mask dimensions if mask is not None: ref_shape, space = (factors.axes_manager.signal_shape, "navigation" if on_loadings else "signal") if isinstance(mask, np.ndarray): warnings.warn( "Bare numpy array masks are deprecated and will be removed" " in next HyperSpy 0.9.", DeprecationWarning) ref_shape = ref_shape[::-1] if mask.shape != ref_shape: raise ValueError( "The `mask` shape is not equal to the %s shape." "Mask shape: %s\tSignal shape in array: %s" % (space, str(mask.shape), str(ref_shape))) else: if on_loadings: mask = self._get_navigation_signal(data=mask) else: mask = self._get_signal_signal(data=mask) elif isinstance(mask, Signal): if mask.axes_manager.signal_shape != ref_shape: raise ValueError( "The `mask` signal shape is not equal to the %s shape." " Mask shape: %s\t%s shape:%s" % (space, str(mask.axes_manager.signal_shape), space, str(ref_shape))) # Note that we don't check the factor's signal dimension. This is on # purpose as an user may like to apply pretreaments that change their # dimensionality. # The diff_axes are given for the main signal. We need to compute # the correct diff_axes for the factors. # Get diff_axes index in axes manager if diff_axes is not None: diff_axes = [1 + axis.index_in_axes_manager for axis in [self.axes_manager[axis] for axis in diff_axes]] if not on_loadings: diff_axes = [index - self.axes_manager.navigation_dimension for index in diff_axes] # Select components to separate if number_of_components is not None: comp_list = range(number_of_components) elif comp_list is not None: number_of_components = len(comp_list) else: if lr.output_dimension is not None: number_of_components = lr.output_dimension comp_list = range(number_of_components) else: raise ValueError( "No `number_of_components` or `comp_list` provided.") factors = stack([factors.inav[i] for i in comp_list]) # Apply differences pre-processing if requested. if diff_order > 0: factors = get_derivative(factors, diff_axes=diff_axes, diff_order=diff_order) if mask is not None: # The following is a little trick to dilate the mask as # required when operation on the differences. It exploits the # fact that np.diff autimatically "dilates" nans. The trick has # a memory penalty which should be low compare to the total # memory required for the core application in most cases. mask_diff_axes = ( [iaxis - 1 for iaxis in diff_axes] if diff_axes is not None else None) mask.change_dtype("float") mask.data[mask.data == 1] = np.nan mask = get_derivative(mask, diff_axes=mask_diff_axes, diff_order=diff_order) mask.data[np.isnan(mask.data)] = 1 mask.change_dtype("bool") # Unfold in case the signal_dimension > 1 factors.unfold() if mask is not None: mask.unfold() factors = factors.data.T[~mask.data] else: factors = factors.data.T # Center and scale the data factors, invsqcovmat = centering_and_whitening(factors) # Perform actual BSS if algorithm == 'orthomax': _, unmixing_matrix = orthomax(factors, **kwargs) unmixing_matrix = unmixing_matrix.T elif algorithm == 'sklearn_fastica': if not import_sklearn.sklearn_installed: raise ImportError( "The optional package scikit learn is not installed " "and it is required for this feature.") if 'tol' not in kwargs: kwargs['tol'] = 1e-10 lr.bss_node = import_sklearn.FastICA( **kwargs) lr.bss_node.whiten = False lr.bss_node.fit(factors) try: unmixing_matrix = lr.bss_node.unmixing_matrix_ except AttributeError: # unmixing_matrix was renamed to components unmixing_matrix = lr.bss_node.components_ else: if mdp_installed is False: raise ImportError( 'MDP is not installed. Nothing done') temp_function = getattr(mdp.nodes, algorithm + "Node") lr.bss_node = temp_function(**kwargs) lr.bss_node.train(factors) unmixing_matrix = lr.bss_node.get_recmatrix() w = np.dot(unmixing_matrix, invsqcovmat) if lr.explained_variance is not None: # The output of ICA is not sorted in any way what makes it # difficult to compare results from different unmixings. The # following code is an experimental attempt to sort them in a # more predictable way sorting_indices = np.argsort(np.dot( lr.explained_variance[:number_of_components], np.abs(w.T)))[::-1] w[:] = w[sorting_indices, :] lr.unmixing_matrix = w lr.on_loadings = on_loadings self._unmix_components() self._auto_reverse_bss_component(lr) lr.bss_algorithm = algorithm
def blind_source_separation(self, number_of_components=None, algorithm='sklearn_fastica', diff_order=1, diff_axes=None, factors=None, comp_list=None, mask=None, on_loadings=False, pretreatment=None, **kwargs): """Blind source separation (BSS) on the result on the decomposition. Available algorithms: FastICA, JADE, CuBICA, and TDSEP Parameters ---------- number_of_components : int number of principal components to pass to the BSS algorithm algorithm : {FastICA, JADE, CuBICA, TDSEP} diff_order : int Sometimes it is convenient to perform the BSS on the derivative of the signal. If diff_order is 0, the signal is not differentiated. diff_axes : None or list of ints or strings If None, when `diff_order` is greater than 1 and `signal_dimension` (`navigation_dimension`) when `on_loadings` is False (True) is greater than 1, the differences are calculated across all signal (navigation) axes. Otherwise the axes can be specified in a list. factors : Signal or numpy array. Factors to decompose. If None, the BSS is performed on the factors of a previous decomposition. If a Signal instance the navigation dimension must be 1 and the size greater than 1. If a numpy array (deprecated) the factors are stored in a 2d array stacked over the last axis. comp_list : boolen numpy array choose the components to use by the boolean list. It permits to choose non contiguous components. mask : bool numpy array or Signal instance. If not None, the signal locations marked as True are masked. The mask shape must be equal to the signal shape (navigation shape) when `on_loadings` is False (True). on_loadings : bool If True, perform the BSS on the loadings of a previous decomposition. If False, performs it on the factors. pretreatment: dict **kwargs : extra key word arguments Any keyword arguments are passed to the BSS algorithm. """ from hyperspy.signal import Signal from hyperspy._signals.spectrum import Spectrum lr = self.learning_results if factors is None: if not hasattr(lr, 'factors') or lr.factors is None: raise AttributeError( 'A decomposition must be performed before blind ' 'source seperation or factors must be provided.') else: if on_loadings: factors = self.get_decomposition_loadings() else: factors = self.get_decomposition_factors() # Check factors if not isinstance(factors, Signal): if isinstance(factors, np.ndarray): warnings.warn( "factors as numpy arrays will raise an error in " "HyperSpy 0.9 and newer. From them on only passing " "factors as HyperSpy Signal instances will be " "supported.", DeprecationWarning) # We proceed supposing that the factors are spectra stacked # over the last dimension to reproduce the deprecated # behaviour. # TODO: Don't forget to change `factors` docstring when # removing this. factors = Spectrum(factors.T) else: # Change next error message when removing the # DeprecationWarning raise ValueError( "`factors` must be either a Signal instance or a " "numpy array but an object of type %s was provided." % type(factors)) # Check factor dimensions if factors.axes_manager.navigation_dimension != 1: raise ValueError("`factors` must have navigation dimension" "equal one, but the navigation dimension " "of the given factors is %i." % factors.axes_manager.navigation_dimension) elif factors.axes_manager.navigation_size < 2: raise ValueError("`factors` must have navigation size" "greater than one, but the navigation " "size of the given factors is %i." % factors.axes_manager.navigation_size) # Check mask dimensions if mask is not None: ref_shape, space = (factors.axes_manager.signal_shape, "navigation" if on_loadings else "signal") if isinstance(mask, np.ndarray): warnings.warn( "Bare numpy array masks are deprecated and will be removed" " in next HyperSpy 0.9.", DeprecationWarning) ref_shape = ref_shape[::-1] if mask.shape != ref_shape: raise ValueError( "The `mask` shape is not equal to the %s shape." "Mask shape: %s\tSignal shape in array: %s" % (space, str(mask.shape), str(ref_shape))) else: if on_loadings: mask = self._get_navigation_signal(data=mask) else: mask = self._get_signal_signal(data=mask) elif isinstance(mask, Signal): if mask.axes_manager.signal_shape != ref_shape: raise ValueError( "The `mask` signal shape is not equal to the %s shape." " Mask shape: %s\t%s shape:%s" % (space, str(mask.axes_manager.signal_shape), space, str(ref_shape))) # Note that we don't check the factor's signal dimension. This is on # purpose as an user may like to apply pretreaments that change their # dimensionality. # The diff_axes are given for the main signal. We need to compute # the correct diff_axes for the factors. # Get diff_axes index in axes manager if diff_axes is not None: diff_axes = [ 1 + axis.index_in_axes_manager for axis in [self.axes_manager[axis] for axis in diff_axes] ] if not on_loadings: diff_axes = [ index - self.axes_manager.navigation_dimension for index in diff_axes ] # Select components to separate if number_of_components is not None: comp_list = range(number_of_components) elif comp_list is not None: number_of_components = len(comp_list) else: if lr.output_dimension is not None: number_of_components = lr.output_dimension comp_list = range(number_of_components) else: raise ValueError( "No `number_of_components` or `comp_list` provided.") factors = stack([factors.inav[i] for i in comp_list]) # Apply differences pre-processing if requested. if diff_order > 0: factors = get_derivative(factors, diff_axes=diff_axes, diff_order=diff_order) if mask is not None: # The following is a little trick to dilate the mask as # required when operation on the differences. It exploits the # fact that np.diff autimatically "dilates" nans. The trick has # a memory penalty which should be low compare to the total # memory required for the core application in most cases. mask_diff_axes = ([iaxis - 1 for iaxis in diff_axes] if diff_axes is not None else None) mask.change_dtype("float") mask.data[mask.data == 1] = np.nan mask = get_derivative(mask, diff_axes=mask_diff_axes, diff_order=diff_order) mask.data[np.isnan(mask.data)] = 1 mask.change_dtype("bool") # Unfold in case the signal_dimension > 1 factors.unfold() if mask is not None: mask.unfold() factors = factors.data.T[~mask.data] else: factors = factors.data.T # Center and scale the data factors, invsqcovmat = centering_and_whitening(factors) # Perform actual BSS if algorithm == 'orthomax': _, unmixing_matrix = orthomax(factors, **kwargs) unmixing_matrix = unmixing_matrix.T elif algorithm == 'sklearn_fastica': if not import_sklearn.sklearn_installed: raise ImportError( "The optional package scikit learn is not installed " "and it is required for this feature.") if 'tol' not in kwargs: kwargs['tol'] = 1e-10 lr.bss_node = import_sklearn.FastICA(**kwargs) lr.bss_node.whiten = False lr.bss_node.fit(factors) try: unmixing_matrix = lr.bss_node.unmixing_matrix_ except AttributeError: # unmixing_matrix was renamed to components unmixing_matrix = lr.bss_node.components_ else: if mdp_installed is False: raise ImportError('MDP is not installed. Nothing done') temp_function = getattr(mdp.nodes, algorithm + "Node") lr.bss_node = temp_function(**kwargs) lr.bss_node.train(factors) unmixing_matrix = lr.bss_node.get_recmatrix() w = np.dot(unmixing_matrix, invsqcovmat) if lr.explained_variance is not None: # The output of ICA is not sorted in any way what makes it # difficult to compare results from different unmixings. The # following code is an experimental attempt to sort them in a # more predictable way sorting_indices = np.argsort( np.dot(lr.explained_variance[:number_of_components], np.abs(w.T)))[::-1] w[:] = w[sorting_indices, :] lr.unmixing_matrix = w lr.on_loadings = on_loadings self._unmix_components() self._auto_reverse_bss_component(lr) lr.bss_algorithm = algorithm