Exemple #1
0
    def fit(self, X, y=None):
        """Fit PoSCE to the given time series for each subject

        Parameters
        ----------
        X : list of n_subjects numpy.ndarray, shapes (n_samples, n_features)
            The input subjects time series. The number of samples may differ
            from one subject to another

        Returns
        -------
        self : PopulationShrunkCovariance instance
            The object itself. Useful for chaining operations.
        """
        # compute covariances from timeseries
        self.cov_estimator_ = clone(self.cov_estimator)
        covariances = [self.cov_estimator_.fit(x).covariance_ for x in X]

        # compute prior mean
        if self.prior_mean_type == "geometric":
            self.prior_mean_ = _geometric_mean(covariances,
                                               max_iter=30,
                                               tol=1e-7)
        elif self.prior_mean_type == "empirical":
            self.prior_mean_ = np.mean(covariances, axis=0)
        else:
            raise ValueError("Allowed mean types are"
                             '"geometric", "euclidean"'
                             ', got type "{}"'.format(self.prior_mean_type))
        self.prior_whitening_ = _map_eigenvalues(lambda x: 1.0 / np.sqrt(x),
                                                 self.prior_mean_)
        self.prior_whitening_inv_ = _map_eigenvalues(lambda x: np.sqrt(x),
                                                     self.prior_mean_)

        # compute the population prior dispersion
        connectivities = [
            _map_eigenvalues(
                np.log,
                self.prior_whitening_.dot(cov).dot(self.prior_whitening_))
            for cov in covariances
        ]
        connectivities = np.array(connectivities)
        connectivities = sym_matrix_to_vec(connectivities)
        self.prior_cov_ = np.mean(
            [
                np.expand_dims(c, 1).dot(np.expand_dims(c, 0))
                for c in connectivities
            ],
            axis=0,
        )
        # approximate the population prior dispersion
        self.prior_cov_approx_ = regularized_eigenvalue_decomposition(
            self.prior_cov_, explained_variance_threshold=0.7)
        return self
def test_geometric_mean_diagonal():
    n_matrices = 20
    n_features = 5
    diags = []
    for k in range(n_matrices):
        diag = np.eye(n_features)
        diag[k % n_features, k % n_features] = 1e4 + k
        diag[(n_features - 1) // (k + 1), (n_features - 1) // (k + 1)] = \
            (k + 1) * 1e-4
        diags.append(diag)
    geo = np.prod(np.array(diags), axis=0) ** (1 / float(len(diags)))
    assert_array_almost_equal(_geometric_mean(diags), geo)
def test_geometric_mean_couple():
    n_features = 7
    spd1 = np.ones((n_features, n_features))
    spd1 = spd1.dot(spd1) + n_features * np.eye(n_features)
    spd2 = np.tril(np.ones((n_features, n_features)))
    spd2 = spd2.dot(spd2.T)
    vals_spd2, vecs_spd2 = np.linalg.eigh(spd2)
    spd2_sqrt = _form_symmetric(np.sqrt, vals_spd2, vecs_spd2)
    spd2_inv_sqrt = _form_symmetric(np.sqrt, 1. / vals_spd2, vecs_spd2)
    geo = spd2_sqrt.dot(_map_eigenvalues(np.sqrt, spd2_inv_sqrt.dot(spd1).dot(
                        spd2_inv_sqrt))).dot(spd2_sqrt)
    assert_array_almost_equal(_geometric_mean([spd1, spd2]), geo)
def test_geometric_mean_diagonal():
    n_matrices = 20
    n_features = 5
    diags = []
    for k in range(n_matrices):
        diag = np.eye(n_features)
        diag[k % n_features, k % n_features] = 1e4 + k
        diag[(n_features - 1) // (k + 1), (n_features - 1) // (k + 1)] = \
            (k + 1) * 1e-4
        diags.append(diag)
    geo = np.prod(np.array(diags), axis=0) ** (1 / float(len(diags)))
    assert_array_almost_equal(_geometric_mean(diags), geo)
def test_geometric_mean_couple():
    n_features = 7
    spd1 = np.ones((n_features, n_features))
    spd1 = spd1.dot(spd1) + n_features * np.eye(n_features)
    spd2 = np.tril(np.ones((n_features, n_features)))
    spd2 = spd2.dot(spd2.T)
    vals_spd2, vecs_spd2 = np.linalg.eigh(spd2)
    spd2_sqrt = _form_symmetric(np.sqrt, vals_spd2, vecs_spd2)
    spd2_inv_sqrt = _form_symmetric(np.sqrt, 1. / vals_spd2, vecs_spd2)
    geo = spd2_sqrt.dot(_map_eigenvalues(np.sqrt, spd2_inv_sqrt.dot(spd1).dot(
                        spd2_inv_sqrt))).dot(spd2_sqrt)
    assert_array_almost_equal(_geometric_mean([spd1, spd2]), geo)
def test_geometric_mean_geodesic():
    n_matrices = 10
    n_features = 6
    sym = np.arange(n_features) / np.linalg.norm(np.arange(n_features))
    sym = sym * sym[:, np.newaxis]
    times = np.arange(n_matrices)
    non_singular = np.eye(n_features)
    non_singular[1:3, 1:3] = np.array([[-1, -.5], [-.5, -1]])
    spds = []
    for time in times:
        spds.append(non_singular.dot(_map_eigenvalues(np.exp, time * sym)).dot(
            non_singular.T))
    gmean = non_singular.dot(_map_eigenvalues(np.exp, times.mean() * sym)).dot(
        non_singular.T)
    assert_array_almost_equal(_geometric_mean(spds), gmean)
def test_geometric_mean_geodesic():
    n_matrices = 10
    n_features = 6
    sym = np.arange(n_features) / np.linalg.norm(np.arange(n_features))
    sym = sym * sym[:, np.newaxis]
    times = np.arange(n_matrices)
    non_singular = np.eye(n_features)
    non_singular[1:3, 1:3] = np.array([[-1, -.5], [-.5, -1]])
    spds = []
    for time in times:
        spds.append(non_singular.dot(_map_eigenvalues(np.exp, time * sym)).dot(
            non_singular.T))
    gmean = non_singular.dot(_map_eigenvalues(np.exp, times.mean() * sym)).dot(
        non_singular.T)
    assert_array_almost_equal(_geometric_mean(spds), gmean)
def map_tangent(data, diag=False):
    """Transform to tangent space.

    Parameters
    ----------
    data: list of numpy.ndarray of shape(n_features, n_features)
        List of semi-positive definite matrices.
    diag: bool
        Whether to discard the diagonal elements before vectorizing. Default is
        False.

    Returns
    -------
    tangent: numpy.ndarray, shape(n_features * (n_features - 1) / 2)
    """
    mean_ = _geometric_mean(data, max_iter=30, tol=1e-7)
    whitening_ = _map_eigenvalues(lambda x: 1. / np.sqrt(x),
                                  mean_)
    tangent = [_map_eigenvalues(np.log, whitening_.dot(c).dot(whitening_))
               for c in data]
    tangent = np.array(tangent)

    return sym_matrix_to_vec(tangent, discard_diagonal=diag)
def test_geometric_mean_properties():
    n_matrices = 40
    n_features = 15
    spds = []
    for k in range(n_matrices):
        spds.append(random_spd(n_features, eig_min=1., cond=10.,
                               random_state=0))
    input_spds = copy.copy(spds)
    gmean = _geometric_mean(spds)

    # Generic
    assert_true(isinstance(spds, list))
    for spd, input_spd in zip(spds, input_spds):
        assert_array_equal(spd, input_spd)
    assert(is_spd(gmean, decimal=7))

    # Invariance under reordering
    spds.reverse()
    spds.insert(0, spds[1])
    spds.pop(2)
    assert_array_almost_equal(_geometric_mean(spds), gmean)

    # Invariance under congruent transformation
    non_singular = random_non_singular(n_features, random_state=0)
    spds_cong = [non_singular.dot(spd).dot(non_singular.T) for spd in spds]
    assert_array_almost_equal(_geometric_mean(spds_cong),
                              non_singular.dot(gmean).dot(non_singular.T))

    # Invariance under inversion
    spds_inv = [linalg.inv(spd) for spd in spds]
    init = linalg.inv(np.mean(spds, axis=0))
    assert_array_almost_equal(_geometric_mean(spds_inv, init=init),
                              linalg.inv(gmean))

    # Gradient norm is decreasing
    grad_norm = grad_geometric_mean(spds, tol=1e-20)
    difference = np.diff(grad_norm)
    assert_true(np.amax(difference) <= 0.)

    # Check warning if gradient norm in the last step is less than
    # tolerance
    max_iter = 1
    tol = 1e-20
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        gmean = _geometric_mean(spds, max_iter=max_iter, tol=tol)
        assert_equal(len(w), 1)
    grad_norm = grad_geometric_mean(spds, max_iter=max_iter, tol=tol)
    assert_equal(len(grad_norm), max_iter)
    assert_true(grad_norm[-1] > tol)

    # Evaluate convergence. A warning is printed if tolerance is not reached
    for p in [.5, 1.]:  # proportion of badly conditionned matrices
        spds = []
        for k in range(int(p * n_matrices)):
            spds.append(random_spd(n_features, eig_min=1e-2, cond=1e6,
                                   random_state=0))
        for k in range(int(p * n_matrices), n_matrices):
            spds.append(random_spd(n_features, eig_min=1., cond=10.,
                                   random_state=0))
        if p < 1:
            max_iter = 30
        else:
            max_iter = 60
        gmean = _geometric_mean(spds, max_iter=max_iter, tol=1e-5)
def test_geometric_mean_properties():
    n_matrices = 40
    n_features = 15
    spds = []
    for k in range(n_matrices):
        spds.append(random_spd(n_features, eig_min=1., cond=10.,
                               random_state=0))
    input_spds = copy.copy(spds)
    gmean = _geometric_mean(spds)

    # Generic
    assert isinstance(spds, list)
    for spd, input_spd in zip(spds, input_spds):
        assert_array_equal(spd, input_spd)
    assert(is_spd(gmean, decimal=7))

    # Invariance under reordering
    spds.reverse()
    spds.insert(0, spds[1])
    spds.pop(2)
    assert_array_almost_equal(_geometric_mean(spds), gmean)

    # Invariance under congruent transformation
    non_singular = random_non_singular(n_features, random_state=0)
    spds_cong = [non_singular.dot(spd).dot(non_singular.T) for spd in spds]
    assert_array_almost_equal(_geometric_mean(spds_cong),
                              non_singular.dot(gmean).dot(non_singular.T))

    # Invariance under inversion
    spds_inv = [linalg.inv(spd) for spd in spds]
    init = linalg.inv(np.mean(spds, axis=0))
    assert_array_almost_equal(_geometric_mean(spds_inv, init=init),
                              linalg.inv(gmean))

    # Gradient norm is decreasing
    grad_norm = grad_geometric_mean(spds, tol=1e-20)
    difference = np.diff(grad_norm)
    assert np.amax(difference) <= 0.

    # Check warning if gradient norm in the last step is less than
    # tolerance
    max_iter = 1
    tol = 1e-20
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")
        gmean = _geometric_mean(spds, max_iter=max_iter, tol=tol)
        assert len(w) == 1
    grad_norm = grad_geometric_mean(spds, max_iter=max_iter, tol=tol)
    assert len(grad_norm) == max_iter
    assert grad_norm[-1] > tol

    # Evaluate convergence. A warning is printed if tolerance is not reached
    for p in [.5, 1.]:  # proportion of badly conditionned matrices
        spds = []
        for k in range(int(p * n_matrices)):
            spds.append(random_spd(n_features, eig_min=1e-2, cond=1e6,
                                   random_state=0))
        for k in range(int(p * n_matrices), n_matrices):
            spds.append(random_spd(n_features, eig_min=1., cond=10.,
                                   random_state=0))
        if p < 1:
            max_iter = 30
        else:
            max_iter = 60
        gmean = _geometric_mean(spds, max_iter=max_iter, tol=1e-5)