def test_covariance():
    x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42)

    # make features correlated
    x = np.dot(x, np.arange(x.shape[1]**2).reshape(x.shape[1], x.shape[1]))

    c_e = _cov(x, 'empirical')
    assert_almost_equal(c_e, c_e.T)

    c_s = _cov(x, 'auto')
    assert_almost_equal(c_s, c_s.T)
def test_covariance():
    x, y = make_blobs(n_samples=100, n_features=5, centers=1, random_state=42)

    # make features correlated
    x = np.dot(x, np.arange(x.shape[1] ** 2).reshape(x.shape[1], x.shape[1]))

    c_e = _cov(x, "empirical")
    assert_almost_equal(c_e, c_e.T)

    c_s = _cov(x, "auto")
    assert_almost_equal(c_s, c_s.T)
    def fit(self, X, y):
        if not np.all(X.index == y.index):
            warnings.warn(
                "Warning: Indexes in X and y are different. Are you sure they are correctly alligned?"
            )

        self.X = X
        self.y = y

        sample_ids = X.index
        feature_ids = X.columns
        X = X.as_matrix()
        y = y.as_matrix()

        nuee_LDA = sklearn_LDA(solver=self.solver,
                               shrinkage=self.shrinkage,
                               priors=self.priors,
                               n_components=self.n_components,
                               store_covariance=self.store_covariance,
                               tol=self.tol)

        nuee_LDA.fit(X, y)
        ordi_column_names = [
            'LDA%d' % (i + 1) for i in range(nuee_LDA.coef_.shape[1])
        ]

        # prepare output
        ## Compute eigenvalues. sklearn doesn't export them,
        ## so they have to be generated
        Sw = nuee_LDA.covariance_
        St = _cov(X, nuee_LDA.shrinkage)
        Sb = St - Sw  # between scatter
        eigenvalues, _ = linalg.eigh(Sb, Sw)
        eigenvalues = eigenvalues[::-1]
        p_explained = pd.Series(
            nuee_LDA.explained_variance_ratio_,
            index=ordi_column_names[:len(nuee_LDA.explained_variance_ratio_)])

        sample_scores = nuee_LDA.transform(X)
        biplot_scores = nuee_LDA.scalings_
        if self.scaling == 2:
            sample_scores = sample_scores.dot(
                np.diag(eigenvalues[:sample_scores.shape[1]]**(-0.5)))
            biplot_scores = biplot_scores.dot(
                np.diag(eigenvalues[:biplot_scores.shape[1]]**0.5))

        # Add LCA ordination object names to self
        self.ordiobject_type = 'LDA'
        self.method_name = 'Linear Discriminant Analysis'
        self.ordi_fitted = nuee_LDA
        self.eigenvalues = eigenvalues
        self.proportion_explained = p_explained
        self.sample_scores = pd.DataFrame(
            sample_scores,
            index=sample_ids,
            columns=ordi_column_names[:sample_scores.shape[1]])
        self.sample_scores.index.name = 'ID'
        self.biplot_scores = pd.DataFrame(
            biplot_scores,
            index=feature_ids,
            columns=ordi_column_names[:biplot_scores.shape[1]])
        self.biplot_scores.index.name = 'ID'

        return self
Exemple #4
0
def imagery_time(folder,
                 subs,
                 filt,
                 im_times,
                 event_ids,
                 n_perm,
                 n_pseudo,
                 bins,
                 step=1):
    import numpy as np
    import mne
    from mne.time_frequency import tfr_morlet, psd_multitaper, psd_welch
    import os
    import scipy
    from sklearn.discriminant_analysis import _cov
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB

    svm = SVC(kernel='linear')
    CV = ShuffleBinLeaveOneOut
    out = list()

    for i, sub in enumerate(subs):
        fnames = [
            folder + '/IR_' + str(sub).zfill(2) + '_S01.bdf',
            folder + '/IR_' + str(sub).zfill(2) + '_S02.bdf'
        ]
        epochs = load_to_epochs(fnames, event_ids, im_times, filt)
        epochs.drop_channels(['Status'
                              ]).equalize_event_counts(event_ids=event_ids,
                                                       method='mintime')
        X = epochs.get_data()
        y = epochs.events[:, 2]
        y = [a - 30 for a in y]
        n_conditions = len(np.unique(y))
        n_sensors = X.shape[1]
        n_time = X.shape[2]
        cv = CV(y, n_iter=n_perm, n_pseudo=n_pseudo)
        result = np.full((n_perm, n_conditions, n_conditions, n_time), np.nan)

        for f, (train_indices, test_indices) in enumerate(cv.split(X)):
            print('\tPermutation %g / %g' % (f + 1, n_perm))

            # 1. Compute pseudo-trials for training and test
            Xpseudo_train = np.full((len(train_indices), n_sensors, n_time),
                                    np.nan)
            Xpseudo_test = np.full((len(test_indices), n_sensors, n_time),
                                   np.nan)
            for i, ind in enumerate(train_indices):
                Xpseudo_train[i, :, :] = np.mean(X[ind, :, :], axis=0)
            for i, ind in enumerate(test_indices):
                Xpseudo_test[i, :, :] = np.mean(X[ind, :, :], axis=0)

            # 2. Whitening using the Epoch method
            sigma_conditions = cv.labels_pseudo_train[0, :,
                                                      n_pseudo - 1:].flatten()
            sigma_ = np.empty((n_conditions, n_sensors, n_sensors))
            for k, c in enumerate(np.unique(y)):
                # compute sigma for each time point, then average across time
                sigma_[k] = np.mean([
                    _cov(Xpseudo_train[sigma_conditions == c, :, t],
                         shrinkage='auto') for t in range(n_time)
                ],
                                    axis=0)
            sigma = sigma_.mean(axis=0)  # average across conditions
            sigma_inv = scipy.linalg.fractional_matrix_power(sigma, -0.5)
            Xpseudo_train = (
                Xpseudo_train.swapaxes(1, 2) @ sigma_inv).swapaxes(1, 2)
            Xpseudo_test = (Xpseudo_test.swapaxes(1, 2) @ sigma_inv).swapaxes(
                1, 2)

            for c1 in range(n_conditions - 1):
                for c2 in range(min(c1 + 1, n_conditions - 1), n_conditions):
                    for t in np.arange(0, n_time - bins, step):
                        # 3. Fit the classifier using training data
                        data_train = Xpseudo_train[cv.ind_pseudo_train[c1,
                                                                       c2], :,
                                                   t:t + bins]
                        data_train = np.reshape(
                            data_train,
                            (data_train.shape[0],
                             data_train.shape[1] * data_train.shape[2]),
                            order='F')
                        svm.fit(data_train, cv.labels_pseudo_train[c1, c2])

                        # 4. Compute and store classification accuracies
                        data_test = Xpseudo_test[cv.ind_pseudo_test[c1, c2], :,
                                                 t:t + bins]
                        data_test = np.reshape(
                            data_test,
                            (data_test.shape[0],
                             data_test.shape[1] * data_test.shape[2]),
                            order='F')
                        result[f, c1, c2, t] = np.mean(
                            svm.predict(data_test) == cv.labels_pseudo_test[
                                c1, c2]) - 0.5

        # average across permutations
        out.append(result)
        np.savez_compressed('temp', results=out)
    return out
Exemple #5
0
    def generate_new_face(self,
                          N,
                          age,
                          gender,
                          ethn,
                          age_range=20,
                          algorithm='pca',
                          dist='normal',
                          whitened=False,
                          shrinkage=False,
                          save_dir=None):
        """ Generates a new face by randomly synthesizing PCA components,
        applying the inverse PCA transform, and adding the norm.

        Parameters
        ----------
        N : int
            How many new faces should be generated
        age : int
            Desired age of new face
        gender : str
            Desired gender of new face ('M' or 'F')
        ethn : str
            Desired ethnicity of new face ('WC', 'BA', 'EA')
        dist : str
            Distribution used to sample new values ('uniform', 'norm', 'mnorm')
        whitened : bool
            Was the data whitened before decomposition?
        shrinkage : bool
            Whether to apply shrinkage to covariance estimation of residuals.
            Only relevant when dist='mnorm'.
        save_dir : str
            Path to directory with (intermediate) results.
        """

        if save_dir is None:
            save_dir = self.save_dir

        to_write = {i: dict() for i in range(N)}
        print("")
        for mod in self.mods:
            print("Generating new faces (%s) ..." % mod)
            decomp_comps = np.load(
                op.join(save_dir, '%s_decomp_comps.npy' % mod))

            nz_mask = np.load(op.join(save_dir, '%s_nzmask.npy' % mod))
            betas = self._load_chunks(mod, save_dir, 'betas')
            resids_decomp = self._load_chunks(mod, save_dir,
                                              'residuals_decomp')
            relev_scodes = get_scodes_given_criteria(gender, age, age_range,
                                                     ethn, 'v1')
            idx = self._get_idx_of_scode(relev_scodes)
            relev_resids = resids_decomp[idx, :]
            random_data = np.zeros((N, decomp_comps.shape[0]))
            for i in range(N):  # this can probably be implemented faster ...
                if dist == 'uniform':
                    mins, maxs = relev_resids.min(axis=0), relev_resids.max(
                        axis=0)
                    random_data[i, :] = np.random.uniform(mins, maxs)
                elif dist == 'norm':
                    means, stds = relev_resids.mean(axis=0), relev_resids.std(
                        axis=0)
                    random_data[i, :] = np.random.normal(means, stds)
                elif dist == 'mnorm':
                    means = relev_resids.mean(axis=0)

                    if shrinkage:
                        cov = _cov(relev_resids, shrinkage='auto')
                    else:
                        cov = np.cov(relev_resids.T)

                    random_data[i, :] = np.random.multivariate_normal(
                        means, cov)
                else:
                    raise ValueError("Please choose `dist` from ('uniform', "
                                     "'norm', 'mnorm')")

            # For debugging
            if algorithm == 'pca':
                decomp_means = np.load(
                    op.join(save_dir, '%s_decomp_means.npy' % mod))
                if whitened:
                    decomp_explvar = np.load(
                        op.join(save_dir, '%s_decomp_explvar.npy' % mod))
                    resids_inv = np.dot(
                        random_data,
                        np.sqrt(decomp_explvar[:, np.newaxis]) *
                        decomp_comps) + decomp_means
                else:
                    resids_inv = random_data.dot(decomp_comps) + decomp_means
            elif algorithm == 'ica':
                resids_inv = random_data.dot(decomp_comps.T)
                resid_means = np.load(
                    op.join(save_dir, '%s_residuals_means.npy' % mod))
                resid_stds = np.load(
                    op.join(save_dir, '%s_residuals_stds.npy' % mod))
                resids_inv *= resid_stds
                resids_inv += resid_means
            elif algorithm == 'nmf':
                resids_inv = random_data.dot(decomp_comps)
                resid_mins = np.load(
                    op.join(save_dir, '%s_residuals_mins.npy' % mod))
                resid_scale = np.load(
                    op.join(save_dir, '%s_residuals_scale.npy' % mod))
                resids_inv -= resid_mins
                resids_inv /= resid_scale

            norm_vec = self._generate_design_vector(gender, age, ethn)
            norm = norm_vec.dot(betas)
            final_face_data = norm + resids_inv
            for i in range(N):
                tmp = np.zeros(DATA_SHAPES[self.version][mod])
                tmp[nz_mask] = final_face_data[i, :]
                tmp = tmp.reshape(DATA_SHAPES[self.version][mod])
                to_write[i][mod] = tmp

        to_return = []
        for key, value in to_write.items():
            name = 'id-g%i_gen-%s_age-%i_eth-%s.mat' % (key, gender, age, ethn)
            outname = op.join(save_dir, name)
            savemat(outname, value)
            to_return.append(outname)

        return to_return