Beispiel #1
0
def test_fastica_nowhiten():
    m = [[0, 1], [1, 0]]
    ica = FastICA(whiten=False, random_state=0)
    ica.fit(m)
    ica.get_mixing_matrix()

    # test for issue #697
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        ica = FastICA(n_components=1, whiten=False, random_state=0)
        ica.fit(m)  # should raise warning
        assert_true(len(w) == 1)  # 1 warning should be raised
Beispiel #2
0
def test_fastica_nowhiten():
    m = [[0, 1], [1, 0]]
    ica = FastICA(whiten=False, random_state=0)
    ica.fit(m)
    ica.get_mixing_matrix()

    # test for issue #697
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        ica = FastICA(n_components=1, whiten=False, random_state=0)
        ica.fit(m)  # should raise warning
        assert_true(len(w) == 1)  # 1 warning should be raised
Beispiel #3
0
def ICAFilter(signal=None):
    # EEG filtering based on Independent Component Analysis

    # ICA decomposition
    ica = FastICA(whiten=True)
    IC = ica.fit(signal).transform(signal)
    A = ica.get_mixing_matrix()  # signal = np.dot(IC, A.T)

    # noise metrics
    sigma2 = IC.std(ddof=1, axis=0)**2
    f1 = np.abs(IC).max(axis=0) / sigma2
    f2 = np.abs(stats.skew(IC, bias=False, axis=0))
    f = np.hstack((f1.reshape((len(f1), 1)), f2.reshape((len(f2), 1))))
    fr = f.copy()
    f /= f.max(axis=0)
    norm = np.sqrt(np.dot(f, f.T)).diagonal()

    # remove noisy IC
    ind = norm.argmax()
    IC_ = IC.copy()
    IC_[:, ind] = 0

    # recompute signal
    signalF = np.dot(IC_, A.T)

    return signalF, IC, fr
def independent_component(x, y):
    clf = FastICA(random_state=1)
    clf.fit(x.reshape(-1, 1), y)
    comp = clf.components_[0][0]
    mm = clf.get_mixing_matrix()[0][0]
    sources = clf.sources_.flatten()
    src_max = max(sources)
    src_min = min(sources)
    return [comp, mm, src_max, src_min]
def independent_component((x, y)):
    lenX = len(x)
    newX = np.array(x).reshape(lenX, 1)
    g = FastICA()
    g.fit(newX, y)
    ret = [0.0, 0.0, 0.0, 0.0]
    ret[0] = g.components_[0][0]
    ret[1] = g.get_mixing_matrix()[0][0]
    sources = g.sources_.flatten()
    ret[2] = max(sources)
    ret[3] = min(sources)
    return ret
def independent_component( (x, y) ):
    lenX = len(x)
    newX = np.array(x).reshape(lenX, 1)
    g = FastICA()
    g.fit(newX, y)
    ret = [0.0, 0.0, 0.0, 0.0]
    ret[0] = g.components_[0][0]
    ret[1] = g.get_mixing_matrix()[0][0]
    sources = g.sources_.flatten()
    ret[2] = max(sources)
    ret[3] = min(sources)
    return ret
Beispiel #7
0
    def RunICAScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      data = np.genfromtxt(self.dataset, delimiter=',')

      s = re.search('-s (\d+)', options)
      s = 0 if not s else int(s.group(1))

      try:
        # Perform ICA.
        with totalTimer:
          model = FastICA(random_state=s)
          ic = model.fit(data).transform(data)
          mixing = model.get_mixing_matrix()
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Beispiel #8
0
        def RunICAScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            data = np.genfromtxt(self.dataset, delimiter=',')

            s = re.search('-s (\d+)', options)
            s = 0 if not s else int(s.group(1))

            try:
                # Perform ICA.
                with totalTimer:
                    model = FastICA(random_state=s)
                    ic = model.fit(data).transform(data)
                    mixing = model.get_mixing_matrix()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Beispiel #9
0
class ICA(object):
    """M/EEG signal decomposition using Independent Component Analysis (ICA)

    This object can be used to estimate ICA components and then
    remove some from Raw or Epochs for data exploration or artifact
    correction.

    Parameters
    ----------
    n_components : int | float | None
        The number of components used for ICA decomposition. If int, it must be
        smaller then max_n_components. If None, all PCA components will be
        used. If float between 0 and 1 components can will be selected by the
        cumulative percentage of explained variance.
    max_n_components : int | None
        The number of components used for PCA decomposition. If None, no
        dimension reduction will be applied and max_n_components will equal
        the number of channels supplied on decomposing data.
    noise_cov : None | instance of mne.cov.Covariance
        Noise covariance used for whitening. If None, channels are just
        z-scored.
    random_state : None | int | instance of np.random.RandomState
        np.random.RandomState to initialize the FastICA estimation.
        As the estimation is non-deterministic it can be useful to
        fix the seed to have reproducible results.
    algorithm : {'parallel', 'deflation'}
        Apply parallel or deflational algorithm for FastICA
    fun : string or function, optional. Default: 'logcosh'
        The functional form of the G function used in the
        approximation to neg-entropy. Could be either 'logcosh', 'exp',
        or 'cube'.
        You can also provide your own function. It should return a tuple
        containing the value of the function, and of its derivative, in the
        point.
    fun_args: dictionary, optional
        Arguments to send to the functional form.
        If empty and if fun='logcosh', fun_args will take value
        {'alpha' : 1.0}
    verbose : bool, str, int, or None
        If not None, override default verbose level (see mne.verbose).

    Attributes
    ----------
    last_fit : str
        Flag informing about which type was last fit.
    ch_names : list-like
        Channel names resulting from initial picking.
    n_components : int
        The number of components used for ICA decomposition.
    max_n_components : int
        The number of PCA dimensions computed.
    verbose : bool, str, int, or None
        See above.
    """
    @verbose
    def __init__(self, n_components, max_n_components=100, noise_cov=None,
                 random_state=None, algorithm='parallel', fun='logcosh',
                 fun_args=None, verbose=None):
        try:
            from sklearn.decomposition import FastICA  # to avoid strong dep.
        except ImportError:
            raise Exception('the scikit-learn package is missing and '
                            'required for ICA')
        self.noise_cov = noise_cov

        # sklearn < 0.11 does not support random_state argument for FastICA
        kwargs = {'algorithm': algorithm, 'fun': fun, 'fun_args': fun_args}

        if random_state is not None:
            aspec = inspect.getargspec(FastICA.__init__)
            if 'random_state' not in aspec.args:
                warnings.warn('random_state argument ignored, update '
                              'scikit-learn to version 0.11 or newer')
            else:
                kwargs['random_state'] = random_state

        if max_n_components is not None and n_components > max_n_components:
            raise ValueError('n_components must be smaller than '
                             'max_n_components')

        if isinstance(n_components, float):
            if not 0 < n_components <= 1:
                raise ValueError('For selecting ICA components by the '
                                 'explained variance of PCA components the'
                                 ' float value must be between 0.0 and 1.0 ')
            self._explained_var = n_components
            logger.info('Selecting pca_components via explained variance.')
        else:
            self._explained_var = 1.1
            logger.info('Selecting pca_components directly.')

        self._ica = FastICA(**kwargs)
        self.current_fit = 'unfitted'
        self.verbose = verbose
        self.n_components = n_components
        self.max_n_components = max_n_components
        self.ch_names = None
        self._mixing = None

    def __repr__(self):
        s = 'ICA '
        if self.current_fit == 'unfitted':
            msg = '(no'
        elif self.current_fit == 'raw':
            msg = '(raw data'
        else:
            msg = '(epochs'
        msg += ' decomposition, '

        s += msg + ('%s components' % str(self.n_components) if
               self.n_components else 'no dimension reduction') + ')'

        return s

    @verbose
    def decompose_raw(self, raw, picks=None, start=None, stop=None,
                      verbose=None):
        """Run the ICA decomposition on raw data

        Parameters
        ----------
        raw : instance of mne.fiff.Raw
            Raw measurements to be decomposed.
        picks : array-like
            Channels to be included. This selection remains throughout the
            initialized ICA session. If None only good data channels are used.
        start : int
            First sample to include (first is 0). If omitted, defaults to the
            first sample in data.
        stop : int
            First sample to not include. If omitted, data is included to the
            end.
        verbose : bool, str, int, or None
            If not None, override default verbose level (see mne.verbose).
            Defaults to self.verbose.

        Returns
        -------
        self : instance of ICA
            Returns the modified instance.
        """
        if self.current_fit != 'unfitted':
            raise RuntimeError('ICA decomposition has already been fitted. '
                               'Please start a new ICA session.')

        logger.info('Computing signal decomposition on raw data. '
                    'Please be patient, this may take some time')

        if picks is None:  # just use good data channels
            picks = pick_types(raw.info, meg=True, eeg=True, eog=False,
                               ecg=False, misc=False, stim=False,
                               exclude=raw.info['bads'])

        if self.max_n_components is None:
            self.max_n_components = len(picks)
            logger.info('Inferring max_n_components from picks.')

        self.ch_names = [raw.ch_names[k] for k in picks]

        data, self._pre_whitener = self._pre_whiten(raw[picks, start:stop][0],
                                                   raw.info, picks)

        to_ica, self._pca = self._prepare_pca(data, self.max_n_components)

        self._ica.fit(to_ica)
        self._mixing = self._ica.get_mixing_matrix().T
        self.current_fit = 'raw'

        return self

    @verbose
    def decompose_epochs(self, epochs, picks=None, verbose=None):
        """Run the ICA decomposition on epochs

        Parameters
        ----------
        epochs : instance of Epochs
            The epochs. The ICA is estimated on the concatenated epochs.
        picks : array-like
            Channels to be included relative to the channels already picked on
            epochs-initialization. This selection remains throughout the
            initialized ICA session.
        verbose : bool, str, int, or None
            If not None, override default verbose level (see mne.verbose).
            Defaults to self.verbose.

        Returns
        -------
        self : instance of ICA
            Returns the modified instance.
        """
        if self.current_fit != 'unfitted':
            raise RuntimeError('ICA decomposition has already been fitted. '
                               'Please start a new ICA session.')

        logger.info('Computing signal decomposition on epochs. '
                    'Please be patient, this may take some time')

        if picks is None:  # just use epochs good data channels and avoid
            picks = pick_types(epochs.info, include=epochs.ch_names,  # double
                               exclude=epochs.info['bads'])  # picking

        meeg_picks = pick_types(epochs.info, meg=True, eeg=True, eog=False,
                                ecg=False, misc=False, stim=False,
                                exclude=epochs.info['bads'])

        # filter out all the channels the raw wouldn't have initialized
        picks = np.intersect1d(meeg_picks, picks)

        self.ch_names = [epochs.ch_names[k] for k in picks]

        if self.max_n_components is None:
            self.max_n_components = len(picks)
            logger.info('Inferring max_n_components from picks.')

        data, self._pre_whitener = self._pre_whiten(
                                np.hstack(epochs.get_data()[:, picks]),
                                epochs.info, picks)

        to_ica, self._pca = self._prepare_pca(data, self.max_n_components)

        self._ica.fit(to_ica)
        self._mixing = self._ica.get_mixing_matrix().T
        self.current_fit = 'epochs'

        return self

    def get_sources_raw(self, raw, start=None, stop=None):
        """Estimate raw sources given the unmixing matrix

        Parameters
        ----------
        raw : instance of Raw
            Raw object to draw sources from.
        start : int
            First sample to include (first is 0). If omitted, defaults to the
            first sample in data.
        stop : int
            First sample to not include.
            If omitted, data is included to the end.

        Returns
        -------
        sources : array, shape = (n_components, n_times)
            The ICA sources time series.
        """
        if self._mixing is None:
            raise RuntimeError('No fit available. Please first fit ICA '
                               'decomposition.')

        return self._get_sources_raw(raw, start, stop)[0]

    def _get_sources_raw(self, raw, start, stop):
        picks = [raw.ch_names.index(k) for k in self.ch_names]
        data, _ = self._pre_whiten(raw[picks, start:stop][0], raw.info, picks)
        pca_data = self._pca.transform(data.T)
        raw_sources = self._ica.transform(pca_data[:, self._comp_idx]).T

        return raw_sources, pca_data

    def get_sources_epochs(self, epochs, concatenate=False):
        """Estimate epochs sources given the unmixing matrix

        Parameters
        ----------
        epochs : instance of Epochs
            Epochs object to draw sources from.
        concatenate : bool
            If true, epochs and time slices will be concatenated.

        Returns
        -------
        epochs_sources : ndarray of shape (n_epochs, n_sources, n_times)
            The sources for each epoch
        """
        if self._mixing is None:
            raise RuntimeError('No fit available. Please first fit ICA '
                               'decomposition.')

        return self._get_sources_epochs(epochs, concatenate)[0]

    def _get_sources_epochs(self, epochs, concatenate):

        picks = pick_types(epochs.info, include=self.ch_names,
                               exclude=epochs.info['bads'])

        # special case where epochs come picked but fit was 'unpicked'.
        if len(picks) != len(self.ch_names):
            raise RuntimeError('Epochs don\'t match fitted data: %i channels '
                               'fitted but %i channels supplied. \nPlease '
                               'provide Epochs compatible with '
                               'ica.ch_names' % (len(self.ch_names),
                                                  len(picks)))

        data, _ = self._pre_whiten(np.hstack(epochs.get_data()[:, picks]),
                                   epochs.info, picks)

        pca_data = self._pca.transform(data.T)
        sources = self._ica.transform(pca_data[:, self._comp_idx]).T
        sources = np.array(np.split(sources, len(epochs.events), 1))

        if concatenate:
            sources = np.hstack(sources)

        return sources, pca_data

    def export_sources(self, raw, picks=None, start=None, stop=None):
        """Export sources as raw object

        Parameters
        ----------
        raw : instance of Raw
            Raw object to export sources from.
        picks : array-like
            Channels to be included in addition to the sources. If None,
            artifact and stimulus channels will be included.
        start : int
            First sample to include (first is 0). If omitted, defaults to the
            first sample in data.
        stop : int
            First sample to not include. If omitted, data is included to the
            end.

        Returns
        -------
        out : instance of mne.Raw
            Container object for ICA sources

        """
        if not raw._preloaded:
            raise ValueError('raw data should be preloaded to have this '
                             'working. Please read raw data with '
                             'preload=True.')

        # include 'reference' channels for comparison with ICA
        if picks is None:
            picks = pick_types(raw.info, meg=False, eeg=False, misc=True,
                               ecg=True, eog=True, stim=True)

        # merge copied instance and picked data with sources
        out = raw.copy()
        out.fids = []
        sources = self.get_sources_raw(raw, start=start, stop=stop)
        out._data = np.r_[sources, raw[picks, start:stop][0]]

        # update first and last samples
        out.first_samp = raw.first_samp + (start if start else 0)
        out.last_samp = out.first_samp + stop if stop else raw.last_samp

        # set channel names and info
        ch_names = out.info['ch_names'] = []
        ch_info = out.info['chs'] = []
        for i in xrange(self.n_components):
            ch_names.append('ICA %03d' % (i + 1))
            ch_info.append(dict(ch_name='ICA %03d' % (i + 1), cal=1,
                logno=i + 1, coil_type=FIFF.FIFFV_COIL_NONE,
                kind=FIFF.FIFFV_MISC_CH, coord_Frame=FIFF.FIFFV_COORD_UNKNOWN,
                loc=np.array([0.,  0.,  0.,  1., 0.,  0.,  0.,  1.,
                              0.,  0.,  0.,  1.], dtype=np.float32),
                unit=FIFF.FIFF_UNIT_NONE, eeg_loc=None, range=1.0,
                scanno=i + 1, unit_mul=0, coil_trans=None))

        # re-append additionally picked ch_names
        ch_names += [raw.ch_names[k] for k in picks]
        # re-append additionally picked ch_info
        ch_info += [raw.info['chs'][k] for k in picks]

        # update number of channels
        out.info['nchan'] = len(picks) + self.n_components

        return out

    def plot_sources_raw(self, raw, order=None, start=None, stop=None,
                         n_components=None, source_idx=None, ncol=3, nrow=10,
                         show=True):
        """Create panel plots of ICA sources. Wrapper around viz.plot_ica_panel

        Parameters
        ----------
        raw : instance of mne.fiff.Raw
            Raw object to plot the sources from.
        order : ndarray | None.
            Index of length n_components. If None, plot will show the sources
            in the order as fitted.
            Example: arg_sort = np.argsort(np.var(sources)).
        start : int
            X-axis start index. If None from the beginning.
        stop : int
            X-axis stop index. If None to the end.
        n_components : int
            Number of components fitted.
        source_idx : array-like
            Indices for subsetting the sources.
        ncol : int
            Number of panel-columns.
        nrow : int
            Number of panel-rows.
        show : bool
            If True, plot will be shown, else just the figure is returned.

        Returns
        -------
        fig : instance of pyplot.Figure
        """

        sources = self.get_sources_raw(raw, start=start, stop=stop)

        if order is not None:
            if len(order) != sources.shape[0]:
                    raise ValueError('order and sources have to be of the '
                                     'same length.')
            else:
                sources = sources[order]

        fig = plot_ica_panel(sources, start=0 if start is not None else start,
                             stop=(stop - start) if stop is not None else stop,
                             n_components=n_components, source_idx=source_idx,
                             ncol=ncol, nrow=nrow)
        if show:
            import matplotlib.pylab as pl
            pl.show()

        return fig

    def plot_sources_epochs(self, epochs, epoch_idx=None, order=None,
                            start=None, stop=None, n_components=None,
                            source_idx=None, ncol=3, nrow=10, show=True):
        """Create panel plots of ICA sources. Wrapper around viz.plot_ica_panel

        Parameters
        ----------
        epochs : instance of mne.Epochs
            Epochs object to plot the sources from.
        epoch_idx : int
            Index to plot particular epoch.
        order : ndarray | None.
            Index of length n_components. If None, plot will show the sources
            in the order as fitted.
            Example: arg_sort = np.argsort(np.var(sources)).
        sources : ndarray
            Sources as drawn from self.get_sources.
        start : int
            X-axis start index. If None from the beginning.
        stop : int
            X-axis stop index. If None to the end.
        n_components : int
            Number of components fitted.
        source_idx : array-like
            Indices for subsetting the sources.
        ncol : int
            Number of panel-columns.
        nrow : int
            Number of panel-rows.
        show : bool
            If True, plot will be shown, else just the figure is returned.

        Returns
        -------
        fig : instance of pyplot.Figure
        """
        sources = self.get_sources_epochs(epochs, concatenate=True if epoch_idx
                                          is None else False)
        source_dim = 1 if sources.ndim > 2 else 0
        if order is not None:
            if len(order) != sources.shape[source_dim]:
                raise ValueError('order and sources have to be of the '
                                 'same length.')
            else:
                sources = (sources[:, order] if source_dim
                           else sources[order])

        fig = plot_ica_panel(sources[epoch_idx], start=start, stop=stop,
                             n_components=n_components, source_idx=source_idx,
                             ncol=ncol, nrow=nrow)
        if show:
            import matplotlib.pylab as pl
            pl.show()

        return fig

    def find_sources_raw(self, raw, target=None, score_func='pearsonr',
                         start=None, stop=None):
        """Find sources based on own distribution or based on similarity to
        other sources or between source and target.

        Parameters
        ----------
        raw : instance of Raw
            Raw object to draw sources from.
        target : array-like | ch_name | None
            Signal to which the sources shall be compared. It has to be of
            the same shape as the sources. If some string is supplied, a
            routine will try to find a matching channel. If None, a score
            function expecting only one input-array argument must be used,
            for instance, scipy.stats.skew (default).
        score_func : callable | str label
            Callable taking as arguments either two input arrays
            (e.g. pearson correlation) or one input
            array (e. g. skewness) and returns a float. For convenience the
            most common score_funcs are available via string labels: Currently,
            all distance metrics from scipy.spatial and all functions from
            scipy.stats taking compatible input arguments are supported. These
            function have been modified to support iteration over the rows of a
            2D array.
        start : int
            First sample to include (first is 0). If omitted, defaults to the
            first sample in data.
        stop : int
            First sample to not include.
            If omitted, data is included to the end.
        scores : ndarray
            Scores for each source as returned from score_func.

        Returns
        -------
        scores : ndarray
            scores for each source as returned from score_func
        """
        # auto source drawing
        sources = self.get_sources_raw(raw=raw, start=start, stop=stop)

        # auto target selection
        if target is not None:
            if hasattr(target, 'ndim'):
                if target.ndim < 2:
                    target = target.reshape(1, target.shape[-1])
            if isinstance(target, str):
                pick = _get_target_ch(raw, target)
                target, _ = raw[pick, start:stop]
            if sources.shape[1] != target.shape[1]:
                raise ValueError('Source and targets do not have the same'
                                 'number of time slices.')
            target = target.ravel()

        return _find_sources(sources, target, score_func)

    def find_sources_epochs(self, epochs, target=None, score_func='pearsonr'):
        """Find sources based on relations between source and target

        Parameters
        ----------
        epochs : instance of Epochs
            Epochs object to draw sources from.
        target : array-like | ch_name | None
            Signal to which the sources shall be compared. It has to be of
            the same shape as the sources. If some string is supplied, a
            routine will try to find a matching channel. If None, a score
            function expecting only one input-array argument must be used,
            for instance, scipy.stats.skew (default).
        score_func : callable | str label
            Callable taking as arguments either two input arrays
            (e.g. pearson correlation) or one input
            array (e. g. skewness) and returns a float. For convenience the
            most common score_funcs are available via string labels: Currently,
            all distance metrics from scipy.spatial and all functions from
            scipy.stats taking compatible input arguments are supported. These
            function have been modified to support iteration over the rows of a
            2D array.

        Returns
        -------
        scores : ndarray
            scores for each source as returned from score_func
        """
        sources = self.get_sources_epochs(epochs=epochs)
        # auto target selection
        if target is not None:
            if hasattr(target, 'ndim'):
                if target.ndim < 3:
                    target = target.reshape(1, 1, target.shape[-1])
            if isinstance(target, str):
                pick = _get_target_ch(epochs, target)
                target = epochs.get_data()[:, pick]
            if sources.shape[2] != target.shape[2]:
                raise ValueError('Source and targets do not have the same'
                                 'number of time slices.')
            target = target.ravel()

        return _find_sources(np.hstack(sources), target, score_func)

    def pick_sources_raw(self, raw, include=None, exclude=None,
                         n_pca_components=64, start=None, stop=None,
                         copy=True):
        """Recompose raw data including or excluding some sources

        Parameters
        ----------
        raw : instance of Raw
            Raw object to pick to remove ICA components from.
        include : list-like | None
            The source indices to use. If None all are used.
        exclude : list-like | None
            The source indices to remove. If None  all are used.
        n_pca_components:
            The number of PCA components to be unwhitened, where n_components
            is the lower bound and max_n_components the upper bound.
            If greater than self.n_components, the PCA components that were not
            supplied to the ICA will get re-attached. This can be used to take
            back the PCA dimension reduction.
        start : int | None
            The first time index to include.
        stop : int | None
            The first time index to exclude.
        copy: bool
            modify raw instance in place or return modified copy.

        Returns
        -------
        raw : instance of Raw
            raw instance with selected ICA components removed
        """
        if not raw._preloaded:
            raise ValueError('raw data should be preloaded to have this '
                             'working. Please read raw data with '
                             'preload=True.')

        if self.current_fit != 'raw':
            raise ValueError('Currently no raw data fitted.'
                             'Please fit raw data first.')

        sources, pca_data = self._get_sources_raw(raw, start=start, stop=stop)
        recomposed = self._pick_sources(sources, pca_data, include, exclude,
                                        n_pca_components)

        if copy is True:
            raw = raw.copy()

        picks = [raw.ch_names.index(k) for k in self.ch_names]
        raw[picks, start:stop] = recomposed
        return raw

    def pick_sources_epochs(self, epochs, include=None, exclude=None,
                            n_pca_components=64, copy=True):
        """Recompose epochs

        Parameters
        ----------
        epochs : instance of Epochs
            Epochs object to pick to remove ICA components from.
        include : list-like | None
            The source indices to use. If None all are used.
        exclude : list-like | None
            The source indices to remove. If None  all are used.
        n_pca_components:
            The number of PCA components to be unwhitened, where n_components
            is the lower bound and max_n_components the upper bound.
            If greater than self.n_components, the PCA components that were not
            supplied to the ICA will get re-attached. This can be used to take
            back the PCA dimension reduction.
        copy : bool
            Modify Epochs instance in place or return modified copy.

        Returns
        -------
        epochs : instance of Epochs
            Epochs with selected ICA components removed.
        """
        if not epochs.preload:
            raise ValueError('raw data should be preloaded to have this '
                             'working. Please read raw data with '
                             'preload=True.')

        sources, pca_data = self._get_sources_epochs(epochs, True)
        picks = pick_types(epochs.info, include=self.ch_names,
                               exclude=epochs.info['bads'])

        if copy is True:
            epochs = epochs.copy()

        # put sources-dimension first for selection
        recomposed = self._pick_sources(sources, pca_data, include, exclude,
                                        n_pca_components)
        # restore epochs, channels, tsl order
        epochs._data[:, picks] = np.array(np.split(recomposed,
                                          len(epochs.events), 1))
        epochs.preload = True

        return epochs

    def _pre_whiten(self, data, info, picks):
        """Helper function"""
        if self.noise_cov is None:  # use standardization as whitener
            pre_whitener = np.std(data) ** -1
            data *= pre_whitener
        else:  # pick cov
            ncov = deepcopy(self.noise_cov)
            if ncov.ch_names != self.ch_names:
                ncov['data'] = ncov.data[picks][:, picks]
            assert data.shape[0] == ncov.data.shape[0]
            pre_whitener, _ = compute_whitener(ncov, info, picks)
            data = np.dot(pre_whitener, data)

        return data, pre_whitener

    def _prepare_pca(self, data, max_n_components):
        """ Helper Function """
        from sklearn.decomposition import RandomizedPCA

        # sklearn < 0.11 does not support random_state argument
        kwargs = {'n_components': max_n_components, 'whiten': False}

        aspec = inspect.getargspec(RandomizedPCA.__init__)
        if 'random_state' not in aspec.args:
            warnings.warn('RandomizedPCA does not support random_state '
                          'argument. Use scikit-learn to version 0.11 '
                          'or newer to get reproducible results.')
        else:
            kwargs['random_state'] = 0

        pca = RandomizedPCA(**kwargs)
        pca_data = pca.fit_transform(data.T)

        if self._explained_var > 1.0:
            if self.n_components is not None:  # normal n case
                self._comp_idx = np.arange(self.n_components)
                to_ica = pca_data[:, self._comp_idx]
            else:  # None case
                to_ica = pca_data
                self.n_components = pca_data.shape[1]
                self._comp_idx = np.arange(self.n_components)
        else:  # float case
            expl_var = pca.explained_variance_ratio_
            self._comp_idx = (np.where(expl_var.cumsum() <
                                      self._explained_var)[0])
            to_ica = pca_data[:, self._comp_idx]
            self.n_components = len(self._comp_idx)

        return to_ica, pca

    def _pick_sources(self, sources, pca_data, include, exclude,
                      n_pca_components):
        """Helper function"""
        if not(self.n_components <= n_pca_components <= self.max_n_components):
            raise ValueError('n_pca_components must be between n_components'
                             ' and max_n_components.')

        if include not in (None, []):
            mute = [i for i in xrange(len(sources)) if i not in include]
            sources[mute, :] = 0.  # include via exclusion
        elif exclude not in (None, []):
            sources[exclude, :] = 0.  # just exclude

        # restore pca data
        mixing = self._mixing.copy()
        pca_restored = np.dot(sources.T, mixing)

        # re-append deselected pca dimension if desired
        if n_pca_components - self.n_components > 0:
            add_components = np.arange(self.n_components, n_pca_components)
            pca_reappend = pca_data[:, add_components]
            pca_restored = np.c_[pca_restored, pca_reappend]

        # restore sensor space data
        out = _inverse_t_pca(pca_restored, self._pca)

        # restore scaling
        pre_whitener = self._pre_whitener.copy()
        if self.noise_cov is None:  # revert standardization
            pre_whitener **= -1
            out *= pre_whitener
        else:
            out = np.dot(out, linalg.pinv(pre_whitener))

        return out.T
                      scale=6,
                      color=color)

    pl.hlines(0, -3, 3)
    pl.vlines(0, -3, 3)
    pl.xlim(-3, 3)
    pl.ylim(-3, 3)
    pl.xlabel('x')
    pl.ylabel('y')


pl.subplot(2, 2, 1)
plot_samples(S / S.std())
pl.title('True Independent Sources')

axis_list = [pca.components_.T, ica.get_mixing_matrix()]
pl.subplot(2, 2, 2)
plot_samples(X / np.std(X), axis_list=axis_list)
pl.legend(['PCA', 'ICA'], loc='upper left')
pl.title('Observations')

pl.subplot(2, 2, 3)
plot_samples(S_pca_ / np.std(S_pca_, axis=0))
pl.title('PCA scores')

pl.subplot(2, 2, 4)
plot_samples(S_ica_ / np.std(S_ica_))
pl.title('ICA estimated sources')

pl.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
Beispiel #11
0
class ICA(object):
    """
    Wrapper for sklearn package.  Performs fast ICA (Independent Component Analysis)

    ICA has 4 methods:
       - fit(waveforms)
       update class instance with ICA fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto ICA space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self,
                 num_components=10,
                 catalog_name='unknown',
                 whiten=True,
                 fun='logcosh',
                 fun_args=None,
                 max_iter=600,
                 tol=.00001,
                 w_init=None,
                 random_state=None,
                 algorithm='parallel'):

        self._decomposition = 'Fast ICA'
        self._num_components = num_components
        self._catalog_name = catalog_name
        self._whiten = whiten
        self._fun = fun
        self._fun_args = fun_args
        self._max_iter = max_iter
        self._tol = tol
        self._w_init = w_init
        self._random_state = random_state
        self._algorithm = algorithm

        self._ICA = FastICA(n_components=self._num_components,
                            whiten=self._whiten,
                            fun=self._fun,
                            fun_args=self._fun_args,
                            max_iter=self._max_iter,
                            tol=self._tol,
                            w_init=self._w_init,
                            random_state=self._random_state,
                            algorithm=self._algorithm)

    def fit(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._ICA.fit(self._waveforms)

    def fit_transform(self, waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._ICA.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self, A):
        # convert basis back to waveforms using fit
        new_waveforms = self._ICA.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._ICA.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the ICA basis vectors (Z^\dagger)"""
        return self._ICA.get_mixing_matrix()
Beispiel #12
0
class ICA(object):
    """
    Wrapper for sklearn package.  Performs fast ICA (Independent Component Analysis)

    ICA has 4 methods:
       - fit(waveforms)
       update class instance with ICA fit

       - fit_transform()
       do what fit() does, but additionally return the projection onto ICA space

       - inverse_transform(A)
       inverses the decomposition, returns waveforms for an input A, using Z

       - get_params()
       returns metadata used for fits.
    """
    def __init__(self, num_components=10,
                 catalog_name='unknown',
                 whiten=True,
                 fun = 'logcosh',
                 fun_args = None,
                 max_iter = 600,
                 tol = .00001,
                 w_init = None,
                 random_state = None,
                 algorithm = 'parallel'):

        self._decomposition  = 'Fast ICA'
        self._num_components = num_components
        self._catalog_name   = catalog_name
        self._whiten         = whiten
        self._fun            = fun
        self._fun_args       = fun_args
        self._max_iter       = max_iter
        self._tol            = tol
        self._w_init         = w_init
        self._random_state   = random_state
        self._algorithm      = algorithm

        self._ICA = FastICA(n_components=self._num_components,
                             whiten       = self._whiten,
                             fun          = self._fun,
                             fun_args     = self._fun_args,
                             max_iter     = self._max_iter,
                             tol          = self._tol,
                             w_init       = self._w_init,
                             random_state = self._random_state,
                             algorithm    = self._algorithm)


    def fit(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._ICA.fit(self._waveforms)

    def fit_transform(self,waveforms):
        # TODO make sure there are more columns than rows (transpose if not)
        # normalize waveforms
        self._waveforms = waveforms
        self._A = self._ICA.fit_transform(self._waveforms)
        return self._A

    def inverse_transform(self,A):
        # convert basis back to waveforms using fit
        new_waveforms = self._ICA.inverse_transform(A)
        return new_waveforms

    def get_params(self):
        # TODO know what catalog was used! (include waveform metadata)
        params = self._ICA.get_params()
        params['num_components'] = params.pop('n_components')
        params['Decompositon'] = self._decomposition
        return params

    def get_basis(self):
        """ Return the ICA basis vectors (Z^\dagger)"""
        return self._ICA.get_mixing_matrix()
Beispiel #13
0
import numpy as np
import os
from sklearn.decomposition import FastICA

# Load the data.
data = np.genfromtxt('../data/radical.csv', delimiter=',')

# Peform FastICA
ica = FastICA()
S = ica.fit(data).transform(data)
A = ica.get_mixing_matrix()

# show the results.
print S
print A
            if not x==y:  # skip diagonal
                plt.text(x-0.3,y,'%.2f'%cc_pcorr[x,y],size=6,color='red')

    plt.savefig(basedir+'9_correlation_analysis/pcorr_corrcoefs.pdf',format='pdf')


#do clustering

if 1==1:
    dst=pdist(data_pcorr[2:,:])
    Z=linkage(dst,method='complete')
    plt.figure(figsize=(14,12))
    dendrogram(Z,labels=tasknames_pcorr)
    plt.savefig(basedir+'9_correlation_analysis/pcorr_task_cluster.pdf',format='pdf')

# decompose connections using ICA and save adjacency matrices

data_pcorr_fmri=data_pcorr[2:,:]

if 1==0:
    ica = FastICA(n_components=20)
    S_ = ica.fit(data_pcorr_fmri.T).transform(data_pcorr_fmri.T)  # Get the estimated sources
    A_ = ica.get_mixing_matrix()  # Get estimated mixing matrix


                                  #ncomps=20
                                  #nmf=decomposition.ProjectedGradientNMF(n_components=ncomps,sparseness='components',init='nndsvd')
                                  #nmf.fit(data_pcorr_fmri+100)
#comps=nmf.components_

Beispiel #15
0
import numpy as np
import os
from sklearn.decomposition import FastICA

# Load the data.
data = np.genfromtxt('../data/radical.csv', delimiter=',')

# Peform FastICA
ica = FastICA()
S = ica.fit(data).transform(data)
A = ica.get_mixing_matrix() 

# show the results.
print S
print A
Beispiel #16
0
def test_fastica_simple(add_noise=False):
    """ Test the FastICA algorithm on very simple data.
    """
    rng = np.random.RandomState(0)
    # scipy.stats uses the global RNG:
    np.random.seed(0)
    n_samples = 1000
    # Generate two sources:
    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
    s2 = stats.t.rvs(1, size=n_samples)
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing angle
    phi = 0.6
    mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]])
    m = np.dot(mixing, s)

    if add_noise:
        m += 0.1 * rng.randn(2, 1000)

    center_and_norm(m)

    # function as fun arg
    def g_test(x):
        return x ** 3, 3 * x ** 2

    algos = ["parallel", "deflation"]
    nls = ["logcosh", "exp", "cube", g_test]
    whitening = [True, False]
    for algo, nl, whiten in itertools.product(algos, nls, whitening):
        if whiten:
            k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo)
            assert_raises(ValueError, fastica, m.T, fun=np.tanh, algorithm=algo)
        else:
            X = PCA(n_components=2, whiten=True).fit_transform(m.T)
            k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False)
            assert_raises(ValueError, fastica, X, fun=np.tanh, algorithm=algo)
        s_ = s_.T
        # Check that the mixing model described in the docstring holds:
        if whiten:
            assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m))

        center_and_norm(s_)
        s1_, s2_ = s_
        # Check to see if the sources have been estimated
        # in the wrong order
        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
            s2_, s1_ = s_
        s1_ *= np.sign(np.dot(s1_, s1))
        s2_ *= np.sign(np.dot(s2_, s2))

        # Check that we have estimated the original sources
        if not add_noise:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2)
        else:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1)

    # Test FastICA class
    ica = FastICA(fun=nl, algorithm=algo, random_state=0)
    ica.fit(m.T)
    ica.get_mixing_matrix()
    assert_true(ica.components_.shape == (2, 2))
    assert_true(ica.sources_.shape == (1000, 2))

    for fn in [np.tanh, "exp(-.5(x^2))"]:
        ica = FastICA(fun=fn, algorithm=algo, random_state=0)
        assert_raises(ValueError, ica.fit, m.T)

    assert_raises(TypeError, FastICA(fun=xrange(10)).fit, m.T)
Beispiel #17
0
def clusterize_dirichlet(*args, **kwargs):
    # TODO plotting when the classifier is NOT learned in the PCA(2-best) space (ellipses are wrong)
    """ Clustering and plotting with Dirichlet process GMM """
    ### Clustering
    try:
        from sklearn import mixture
        from scipy import linalg
        import pylab as pl
        import matplotlib as mpl
        from sklearn.decomposition import PCA, FastICA
    except:
        print "You need SciPy and scikit-learn"
        sys.exit(-1)

    models = []
    for arg in args:
        if apply_ica:
            for featurenb in range(len(arg[0])):
                if sum(arg[:, featurenb]) == 0.0:
                    arg = arg[:, range(featurenb+1)+range(featurenb+1, len(arg[0]))]
        if kwargs.get('em_gmm', False):
            dpgmm = mixture.GMM(n_components = 4, cvtype='full')
        else:
            dpgmm = mixture.DPGMM(n_components = 100, cvtype='full', alpha=1000.0)
        if kwargs.get('clf_on_pca', False):
            pca = PCA(2)
            dpgmm.fit(pca.fit(arg).transform(arg))
        else:
            dpgmm.fit(arg)
        print dpgmm
        models.append(copy.deepcopy(dpgmm))
        print raw_input("press any key to pass")

    ### Plotting
    color_iter = itertools.cycle (['r', 'g', 'b', 'c', 'm'])
    for i, (clf, data) in enumerate(zip(models, args)):
        if apply_ica:
            ica = FastICA(2)
            X_r = ica.fit(data).transform(data)
            print ica.get_mixing_matrix()
        else:
            pca = PCA(2)
            X_r = pca.fit(data).transform(data)
        print data
        print X_r
        print raw_input("press any key to pass")
        splot = pl.subplot((len(args)+1)/ 2, 2, 1+i)
        pl.scatter(X_r[:,0], X_r[:,1])
        if kwargs.get('clf_on_pca', False):
            Y_ = clf.predict(X_r)
        else:
            Y_ = clf.predict(data)
        for i, (mean, covar, color) in enumerate(zip(clf.means, clf.covars,
                                                     color_iter)):
            v, w = linalg.eigh(covar)
            u = w[0] / linalg.norm(w[0])
            # as the DP will not use every component it has access to
            # unless it needs it, we shouldn't plot the redundant
            # components.
            if not np.any(Y_ == i):
                continue
            #pl.scatter(data[Y_== i, 0], data[Y_== i, 1], .8, color=color)
            pl.scatter(X_r[Y_== i, 0], X_r[Y_== i, 1], .8, color=color)
            # Plot an ellipse to show the Gaussian component
            angle = np.arctan(u[1]/u[0])
            angle = 180 * angle / np.pi # convert to degrees
            ell = mpl.patches.Ellipse(mean, v[0], v[1], 180 + angle, color=color)
            ell.set_clip_box(splot.bbox)
            ell.set_alpha(0.5)
            splot.add_artist(ell)
        pl.xlim(X_r[:, 0].min(), X_r[:, 0].max())
        pl.ylim(X_r[:, 1].min(), X_r[:, 1].max())
        pl.xticks(())
        pl.yticks(())
        pl.title("Dirichlet process GMM")
    pl.show()
Beispiel #18
0
def test_fastica(add_noise=False):
    """ Test the FastICA algorithm on very simple data.
    """
    # scipy.stats uses the global RNG:
    rng = np.random.RandomState(0)
    n_samples = 1000
    # Generate two sources:
    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
    s2 = stats.t.rvs(1, size=n_samples)
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing angle
    phi = 0.6
    mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi),
                                                    -np.cos(phi)]])
    m = np.dot(mixing, s)

    if add_noise:
        m += 0.1 * rng.randn(2, 1000)

    center_and_norm(m)

    algos = ['parallel', 'deflation']
    nls = ['logcosh', 'exp', 'cube']
    whitening = [True, False]
    for algo, nl, whiten in itertools.product(algos, nls, whitening):
        if whiten:
            k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo)
        else:
            X = PCA(n_components=2, whiten=True).fit_transform(m.T)
            k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False)
        s_ = s_.T
        # Check that the mixing model described in the docstring holds:
        if whiten:
            assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m))

        center_and_norm(s_)
        s1_, s2_ = s_
        # Check to see if the sources have been estimated
        # in the wrong order
        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
            s2_, s1_ = s_
        s1_ *= np.sign(np.dot(s1_, s1))
        s2_ *= np.sign(np.dot(s2_, s2))

        # Check that we have estimated the original sources
        if add_noise == False:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2)
        else:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1)

    # Test FastICA class
    ica = FastICA(fun=nl, algorithm=algo, random_state=0)
    ica.fit(m.T)
    ica.get_mixing_matrix()
    assert_true(ica.components_.shape == (2, 2))
    assert_true(ica.sources_.shape == (1000, 2))
Beispiel #19
0
np.random.seed(0)
n_samples = 2000
time = np.linspace(0, 10, n_samples)
s1 = np.sin(2 * time)  # Signal 1 : sinusoidal signal
s2 = np.sign(np.sin(3 * time))  # Signal 2 : square signal
S = np.c_[s1, s2]
S += 0.2 * np.random.normal(size=S.shape)  # Add noise

S /= S.std(axis=0)  # Standardize data
# Mix data
A = np.array([[1, 1], [0.5, 2]])  # Mixing matrix
X = np.dot(S, A.T)  # Generate observations
# Compute ICA
ica = FastICA()
S_ = ica.fit(X).transform(X)  # Get the estimated sources
A_ = ica.get_mixing_matrix()  # Get estimated mixing matrix
assert np.allclose(X, np.dot(S_, A_.T))

###############################################################################
# Plot results
pl.figure()
pl.subplot(3, 1, 1)
pl.plot(S)
pl.title('True Sources')
pl.subplot(3, 1, 2)
pl.plot(X)
pl.title('Observations (mixed signal)')
pl.subplot(3, 1, 3)
pl.plot(S_)
pl.title('ICA estimated sources')
pl.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.36)
            # pl.quiver(x_axis, y_axis, x_axis, y_axis, zorder=11, width=0.01,
            pl.quiver(0, 0, x_axis, y_axis, zorder=11, width=0.01, scale=6,
                      color=color)

    pl.hlines(0, -3, 3)
    pl.vlines(0, -3, 3)
    pl.xlim(-3, 3)
    pl.ylim(-3, 3)
    pl.xlabel('x')
    pl.ylabel('y')

pl.subplot(2, 2, 1)
plot_samples(S / S.std())
pl.title('True Independent Sources')

axis_list = [pca.components_.T, ica.get_mixing_matrix()]
pl.subplot(2, 2, 2)
plot_samples(X / np.std(X), axis_list=axis_list)
pl.legend(['PCA', 'ICA'], loc='upper left')
pl.title('Observations')

pl.subplot(2, 2, 3)
plot_samples(S_pca_ / np.std(S_pca_, axis=0))
pl.title('PCA scores')

pl.subplot(2, 2, 4)
plot_samples(S_ica_ / np.std(S_ica_))
pl.title('ICA estimated sources')

pl.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)
Beispiel #21
0
def test_fastica(add_noise=False):
    """ Test the FastICA algorithm on very simple data.
    """
    # scipy.stats uses the global RNG:
    rng = np.random.RandomState(0)
    n_samples = 1000
    # Generate two sources:
    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
    s2 = stats.t.rvs(1, size=n_samples)
    s = np.c_[s1, s2].T
    center_and_norm(s)
    s1, s2 = s

    # Mixing angle
    phi = 0.6
    mixing = np.array([[np.cos(phi),  np.sin(phi)],
                       [np.sin(phi), -np.cos(phi)]])
    m = np.dot(mixing, s)

    if add_noise:
        m += 0.1 * rng.randn(2, 1000)

    center_and_norm(m)

    algos = ['parallel', 'deflation']
    nls = ['logcosh', 'exp', 'cube']
    whitening = [True, False]
    for algo, nl, whiten in itertools.product(algos, nls, whitening):
        if whiten:
            k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo)
        else:
            X = PCA(n_components=2, whiten=True).fit_transform(m.T)
            k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo,
                                     whiten=False)
        s_ = s_.T
        # Check that the mixing model described in the docstring holds:
        if whiten:
            assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m))

        center_and_norm(s_)
        s1_, s2_ = s_
        # Check to see if the sources have been estimated
        # in the wrong order
        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
            s2_, s1_ = s_
        s1_ *= np.sign(np.dot(s1_, s1))
        s2_ *= np.sign(np.dot(s2_, s2))

        # Check that we have estimated the original sources
        if add_noise == False:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2)
        else:
            assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1)
            assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1)

    # Test FastICA class
    ica = FastICA(fun=nl, algorithm=algo, random_state=0)
    ica.fit(m.T)
    ica.get_mixing_matrix()
    assert_true(ica.components_.shape == (2, 2))
    assert_true(ica.sources_.shape == (1000, 2))
def main(addNoise = 0, savedir = None, doFastICA = False):
    N = 200
    tt = linspace(0, 10, N)

    # make sources
    s1 = 4 + cos(tt*5)
    s2 = tt % 2

    s1 -= mean(s1)
    s1 /= std(s1)
    s2 -= mean(s2)
    s2 /= std(s2)

    pyplot.figure(1)
    pyplot.subplot(4,1,1)
    pyplot.title('original sources')
    pyplot.plot(tt, s1, 'bo-')
    pyplot.subplot(4,1,2)
    pyplot.plot(tt, s2, 'bo-')

    A = array([[3, 1], [-2, .3]])

    S = vstack((s1, s2)).T
    #print 'S', S
    print 'kurt(s1) =', kurt(s1)
    print 'kurt(s2) =', kurt(s2)
    print ' negentropy(s1) =', negentropy(s1)
    print ' negentropy(s2) =', negentropy(s2)
    print ' logcosh10(s1) =', logcosh10(s1)
    print ' logcosh10(s2) =', logcosh10(s2)
    print ' logcosh15(s1) =', logcosh15(s1)
    print ' logcosh15(s2) =', logcosh15(s2)
    print ' logcosh20(s1) =', logcosh20(s1)
    print ' logcosh20(s2) =', logcosh20(s2)
    print ' negexp(s1) =', negexp(s1)
    print ' negexp(s2) =', negexp(s2)
    
    X = dot(S, A)

    if addNoise > 0:
        print 'Adding noise!'
        X += random.normal(0, addNoise, X.shape)
    
    #print 'X', X

    x1 = X[:,0]
    x2 = X[:,1]

    #print 'kurt(x1) =', kurt(x1)
    #print 'kurt(x2) =', kurt(x2)

    pyplot.subplot(4,1,3)
    pyplot.title('observed signal')
    pyplot.plot(tt, x1, 'ro-')
    pyplot.subplot(4,1,4)
    pyplot.plot(tt, x2, 'ro-')

    pyplot.figure(2)
    pyplot.subplot(4,1,1)
    pyplot.title('original sources')
    pyplot.hist(s1)
    pyplot.subplot(4,1,2)
    pyplot.hist(s2)
    pyplot.subplot(4,1,3)
    pyplot.title('observed signal')
    pyplot.hist(x1)
    pyplot.subplot(4,1,4)
    pyplot.hist(x2)

    pca = PCA(X)

    #W = pca.toWhitePC(X)
    W = pca.toZca(X)

    w1 = W[:,0]
    w2 = W[:,1]

    print 'kurt(w1) =', kurt(w1)
    print 'kurt(w2) =', kurt(w2)

    pyplot.figure(3)
    pyplot.subplot(4,2,1)
    pyplot.title('observed signal')
    pyplot.hist(x1)
    pyplot.subplot(4,2,3)
    pyplot.hist(x2)
    pyplot.subplot(2,2,2)
    pyplot.plot(x1, x2, 'bo')

    pyplot.subplot(4,2,5)
    pyplot.title('whitened observed signal')
    pyplot.hist(w1)
    pyplot.subplot(4,2,7)
    pyplot.hist(w2)
    pyplot.subplot(2,2,4)
    pyplot.plot(w1, w2, 'bo')

    # Compute kurtosis at different angles
    thetas = linspace(0, pi, 100)
    kurt1 = 0 * thetas
    for ii, theta in enumerate(thetas):
        kurt1[ii] = kurt(dot(rotMat(theta)[0,:], W.T).T)


    # functions of data
    minfnK    = lambda data: -kurt(data)**2
    minfnNEnt = lambda data: -negentropy(data)
    minfnLC10 = lambda data: -logcosh10(data)
    minfnLC15 = lambda data: -logcosh15(data)
    minfnLC20 = lambda data: -logcosh20(data)
    minfnNExp = lambda data: -negexp(data)

    # functions of the rotation angle, given W as the data
    minAngleFnK    = lambda theta: minfnK(dot(rotMat(theta)[0,:], W.T).T)
    minAngleFnNEnt = lambda theta: minfnNEnt(dot(rotMat(theta)[0,:], W.T).T)
    minAngleFnLC10 = lambda theta: minfnLC10(dot(rotMat(theta)[0,:], W.T).T)
    minAngleFnLC15 = lambda theta: minfnLC15(dot(rotMat(theta)[0,:], W.T).T)
    minAngleFnLC20 = lambda theta: minfnLC20(dot(rotMat(theta)[0,:], W.T).T)
    minAngleFnNExp = lambda theta: minfnNExp(dot(rotMat(theta)[0,:], W.T).T)

    #########
    # Chosen objective function. Change this line to change which objective is used.
    #########
    minDataFn = minfnK 

    minAngleFn = lambda theta: minDataFn(dot(rotMat(theta)[0,:], W.T).T)

    if doFastICA:
        # Use FastICA from sklearn
        #pdb.set_trace()
        from sklearn.decomposition import FastICA
        rng = random.RandomState(1)
        ica = FastICA(random_state = rng, whiten = False)
        ica.fit(W)
        Recon = ica.transform(W)  # Estimate the sources
        #S_fica /= S_fica.std(axis=0)   # (should already be done)
        Ropt = ica.get_mixing_matrix()
    else:
        # Manually fit angle using fmin_bfgs
        angle0 = 0
        xopt = fmin_bfgs(minAngleFn, angle0)
        xopt = xopt[0] % pi
        Ropt = rotMat(xopt)
        Recon = dot(W, Ropt.T)

    mnval = array([minAngleFn(aa) for aa in thetas])

    pyplot.figure(4)
    pyplot.title('objective vs. angle')
    #pyplot.plot(thetas, kurt1, 'bo-', thetas, mnval, 'k', xopt, minAngleFn(xopt), 'ko')
    pyplot.plot(thetas, mnval, 'b')
    if not doFastICA:
        pyplot.hold(True)
        pyplot.plot(xopt, minAngleFn(xopt), 'ko')

    pyplot.figure(5)
    pyplot.title('different gaussianness measures vs. angle')
    pyplot.subplot(6,1,1); pyplot.title('Kurt'); pyplot.plot(thetas, array([minAngleFnK(aa) for aa in thetas]))
    pyplot.subplot(6,1,2); pyplot.title('NegEnt'); pyplot.plot(thetas, array([minAngleFnNEnt(aa) for aa in thetas]))
    pyplot.subplot(6,1,3); pyplot.title('LogCosh10'); pyplot.plot(thetas, array([minAngleFnLC10(aa) for aa in thetas]))
    pyplot.subplot(6,1,4); pyplot.title('LogCosh15'); pyplot.plot(thetas, array([minAngleFnLC15(aa) for aa in thetas]))
    pyplot.subplot(6,1,5); pyplot.title('LogCosh20'); pyplot.plot(thetas, array([minAngleFnLC20(aa) for aa in thetas]))
    pyplot.subplot(6,1,6); pyplot.title('NegExp'); pyplot.plot(thetas, array([minAngleFnNExp(aa) for aa in thetas]))
    
    print 'kurt(r1) =', kurt(Recon[:,0])
    print 'kurt(r2) =', kurt(Recon[:,1])

    print
    print 'objective(s1) =', minDataFn(s1)
    print 'objective(s2) =', minDataFn(s2)
    print 'objective(w1) =', minDataFn(w1)
    print 'objective(w2) =', minDataFn(w2)
    print 'objective(r1) =', minDataFn(Recon[:,0])
    print 'objective(r2) =', minDataFn(Recon[:,1])
    print 'optimal theta:',
    if doFastICA:
        print '<not computed with FastICA>'
    else:
        print xopt, '(+pi/2 =', (xopt+pi/2)%pi, ')'
    print 'Optimal rotation matrix:\n', Ropt

    pyplot.figure(6)
    pyplot.subplot(4,1,1)
    pyplot.title('original sources')
    pyplot.plot(tt, s1, 'bo-')
    pyplot.subplot(4,1,2)
    pyplot.plot(tt, s2, 'bo-')
    pyplot.subplot(4,1,3)
    pyplot.title('reconstructed sources')
    pyplot.plot(tt, Recon[:,0], 'go-')
    pyplot.subplot(4,1,4)
    pyplot.plot(tt, Recon[:,1], 'go-')

    #pyplot.show()

    if savedir:
        figname = lambda ii : os.path.join(savedir, 'figure_%02d.png' % ii)
        for ii in range(6):
            pyplot.figure(ii+1)
            pyplot.savefig(figname(ii+1))
        print 'plots saved in', savedir
    else:
        import ipdb; ipdb.set_trace()
Beispiel #23
0
def main(addNoise=0, savedir=None, doFastICA=False):
    N = 200
    tt = linspace(0, 10, N)

    # make sources
    s1 = 4 + cos(tt * 5)
    s2 = tt % 2

    s1 -= mean(s1)
    s1 /= std(s1)
    s2 -= mean(s2)
    s2 /= std(s2)

    pyplot.figure(1)
    pyplot.subplot(4, 1, 1)
    pyplot.title('original sources')
    pyplot.plot(tt, s1, 'bo-')
    pyplot.subplot(4, 1, 2)
    pyplot.plot(tt, s2, 'bo-')

    A = array([[3, 1], [-2, .3]])

    S = vstack((s1, s2)).T
    #print 'S', S
    print 'kurt(s1) =', kurt(s1)
    print 'kurt(s2) =', kurt(s2)
    print ' negentropy(s1) =', negentropy(s1)
    print ' negentropy(s2) =', negentropy(s2)
    print ' logcosh10(s1) =', logcosh10(s1)
    print ' logcosh10(s2) =', logcosh10(s2)
    print ' logcosh15(s1) =', logcosh15(s1)
    print ' logcosh15(s2) =', logcosh15(s2)
    print ' logcosh20(s1) =', logcosh20(s1)
    print ' logcosh20(s2) =', logcosh20(s2)
    print ' negexp(s1) =', negexp(s1)
    print ' negexp(s2) =', negexp(s2)

    X = dot(S, A)

    if addNoise > 0:
        print 'Adding noise!'
        X += random.normal(0, addNoise, X.shape)

    #print 'X', X

    x1 = X[:, 0]
    x2 = X[:, 1]

    #print 'kurt(x1) =', kurt(x1)
    #print 'kurt(x2) =', kurt(x2)

    pyplot.subplot(4, 1, 3)
    pyplot.title('observed signal')
    pyplot.plot(tt, x1, 'ro-')
    pyplot.subplot(4, 1, 4)
    pyplot.plot(tt, x2, 'ro-')

    pyplot.figure(2)
    pyplot.subplot(4, 1, 1)
    pyplot.title('original sources')
    pyplot.hist(s1)
    pyplot.subplot(4, 1, 2)
    pyplot.hist(s2)
    pyplot.subplot(4, 1, 3)
    pyplot.title('observed signal')
    pyplot.hist(x1)
    pyplot.subplot(4, 1, 4)
    pyplot.hist(x2)

    pca = PCA(X)

    #W = pca.toWhitePC(X)
    W = pca.toZca(X)

    w1 = W[:, 0]
    w2 = W[:, 1]

    print 'kurt(w1) =', kurt(w1)
    print 'kurt(w2) =', kurt(w2)

    pyplot.figure(3)
    pyplot.subplot(4, 2, 1)
    pyplot.title('observed signal')
    pyplot.hist(x1)
    pyplot.subplot(4, 2, 3)
    pyplot.hist(x2)
    pyplot.subplot(2, 2, 2)
    pyplot.plot(x1, x2, 'bo')

    pyplot.subplot(4, 2, 5)
    pyplot.title('whitened observed signal')
    pyplot.hist(w1)
    pyplot.subplot(4, 2, 7)
    pyplot.hist(w2)
    pyplot.subplot(2, 2, 4)
    pyplot.plot(w1, w2, 'bo')

    # Compute kurtosis at different angles
    thetas = linspace(0, pi, 100)
    kurt1 = 0 * thetas
    for ii, theta in enumerate(thetas):
        kurt1[ii] = kurt(dot(rotMat(theta)[0, :], W.T).T)

    # functions of data
    minfnK = lambda data: -kurt(data)**2
    minfnNEnt = lambda data: -negentropy(data)
    minfnLC10 = lambda data: -logcosh10(data)
    minfnLC15 = lambda data: -logcosh15(data)
    minfnLC20 = lambda data: -logcosh20(data)
    minfnNExp = lambda data: -negexp(data)

    # functions of the rotation angle, given W as the data
    minAngleFnK = lambda theta: minfnK(dot(rotMat(theta)[0, :], W.T).T)
    minAngleFnNEnt = lambda theta: minfnNEnt(dot(rotMat(theta)[0, :], W.T).T)
    minAngleFnLC10 = lambda theta: minfnLC10(dot(rotMat(theta)[0, :], W.T).T)
    minAngleFnLC15 = lambda theta: minfnLC15(dot(rotMat(theta)[0, :], W.T).T)
    minAngleFnLC20 = lambda theta: minfnLC20(dot(rotMat(theta)[0, :], W.T).T)
    minAngleFnNExp = lambda theta: minfnNExp(dot(rotMat(theta)[0, :], W.T).T)

    #########
    # Chosen objective function. Change this line to change which objective is used.
    #########
    minDataFn = minfnK

    minAngleFn = lambda theta: minDataFn(dot(rotMat(theta)[0, :], W.T).T)

    if doFastICA:
        # Use FastICA from sklearn
        #pdb.set_trace()
        from sklearn.decomposition import FastICA
        rng = random.RandomState(1)
        ica = FastICA(random_state=rng, whiten=False)
        ica.fit(W)
        Recon = ica.transform(W)  # Estimate the sources
        #S_fica /= S_fica.std(axis=0)   # (should already be done)
        Ropt = ica.get_mixing_matrix()
    else:
        # Manually fit angle using fmin_bfgs
        angle0 = 0
        xopt = fmin_bfgs(minAngleFn, angle0)
        xopt = xopt[0] % pi
        Ropt = rotMat(xopt)
        Recon = dot(W, Ropt.T)

    mnval = array([minAngleFn(aa) for aa in thetas])

    pyplot.figure(4)
    pyplot.title('objective vs. angle')
    #pyplot.plot(thetas, kurt1, 'bo-', thetas, mnval, 'k', xopt, minAngleFn(xopt), 'ko')
    pyplot.plot(thetas, mnval, 'b')
    if not doFastICA:
        pyplot.hold(True)
        pyplot.plot(xopt, minAngleFn(xopt), 'ko')

    pyplot.figure(5)
    pyplot.title('different gaussianness measures vs. angle')
    pyplot.subplot(6, 1, 1)
    pyplot.title('Kurt')
    pyplot.plot(thetas, array([minAngleFnK(aa) for aa in thetas]))
    pyplot.subplot(6, 1, 2)
    pyplot.title('NegEnt')
    pyplot.plot(thetas, array([minAngleFnNEnt(aa) for aa in thetas]))
    pyplot.subplot(6, 1, 3)
    pyplot.title('LogCosh10')
    pyplot.plot(thetas, array([minAngleFnLC10(aa) for aa in thetas]))
    pyplot.subplot(6, 1, 4)
    pyplot.title('LogCosh15')
    pyplot.plot(thetas, array([minAngleFnLC15(aa) for aa in thetas]))
    pyplot.subplot(6, 1, 5)
    pyplot.title('LogCosh20')
    pyplot.plot(thetas, array([minAngleFnLC20(aa) for aa in thetas]))
    pyplot.subplot(6, 1, 6)
    pyplot.title('NegExp')
    pyplot.plot(thetas, array([minAngleFnNExp(aa) for aa in thetas]))

    print 'kurt(r1) =', kurt(Recon[:, 0])
    print 'kurt(r2) =', kurt(Recon[:, 1])

    print
    print 'objective(s1) =', minDataFn(s1)
    print 'objective(s2) =', minDataFn(s2)
    print 'objective(w1) =', minDataFn(w1)
    print 'objective(w2) =', minDataFn(w2)
    print 'objective(r1) =', minDataFn(Recon[:, 0])
    print 'objective(r2) =', minDataFn(Recon[:, 1])
    print 'optimal theta:',
    if doFastICA:
        print '<not computed with FastICA>'
    else:
        print xopt, '(+pi/2 =', (xopt + pi / 2) % pi, ')'
    print 'Optimal rotation matrix:\n', Ropt

    pyplot.figure(6)
    pyplot.subplot(4, 1, 1)
    pyplot.title('original sources')
    pyplot.plot(tt, s1, 'bo-')
    pyplot.subplot(4, 1, 2)
    pyplot.plot(tt, s2, 'bo-')
    pyplot.subplot(4, 1, 3)
    pyplot.title('reconstructed sources')
    pyplot.plot(tt, Recon[:, 0], 'go-')
    pyplot.subplot(4, 1, 4)
    pyplot.plot(tt, Recon[:, 1], 'go-')

    #pyplot.show()

    if savedir:
        figname = lambda ii: os.path.join(savedir, 'figure_%02d.png' % ii)
        for ii in range(6):
            pyplot.figure(ii + 1)
            pyplot.savefig(figname(ii + 1))
        print 'plots saved in', savedir
    else:
        import ipdb
        ipdb.set_trace()