Esempio n. 1
0
def features_normal_cov_toeplitz(n_samples: int = 200,
                                 n_features: int = 30,
                                 cov_corr: float = 0.5):
    """Normal features generator with toeplitz covariance
    
    An example of features obtained as samples of a centered Gaussian
    vector with a toeplitz covariance matrix

    Parameters
    ----------
    n_samples : `int`, default=200
        Number of samples

    n_features : `int`, default=30
        Number of features

    cov_corr : `float`, default=0.5
        correlation coefficient of the Toeplitz correlation matrix

    Returns
    -------
    output : `numpy.ndarray`, shape=(n_samples, n_features)
        n_samples realization of a Gaussian vector with the described
        covariance

    """
    cov = toeplitz(cov_corr**np.arange(0, n_features))
    return np.random.multivariate_normal(np.zeros(n_features),
                                         cov,
                                         size=n_samples)
Esempio n. 2
0
def simu_linreg(x, n, std=1., corr=0.5):
    """Simulation for the least-squares problem.

    Parameters
    ----------
    x : ndarray, shape (d,)
        The coefficients of the model
    n : int
        Sample size
    std : float, default=1.
        Standard-deviation of the noise
    corr : float, default=0.5
        Correlation of the features matrix
    
    Returns
    -------
    A : ndarray, shape (n, d)
        The design matrix.
    b : ndarray, shape (n,)
        The targets.
    """
    d = x.shape[0]
    cov = toeplitz(corr**np.arange(0, d))
    A = multivariate_normal(np.zeros(d), cov, size=n)
    noise = std * randn(n)
    b = A.dot(x) + noise
    return A, b
Esempio n. 3
0
def simu_linreg(coefs=None,
                n_samples=1000,
                n_features=1000,
                corr=0.5,
                random_state=42):
    """Simulation of a linear regression model

    Parameters
    ----------
    coefs : `numpy.array`, shape (n_features,)
        Coefficients of the model

    n_samples : `int`, default=1000
        Number of samples to simulate

    corr : `float`, default=0.5
         Correlation between features i and j is corr^|i - j|


    Returns
    -------
    X : `numpy.ndarray`, shape (n_samples, n_features)
        Simulated features matrix. It samples of a centered Gaussian
        vector with covariance given by the Toeplitz matrix

    y : `numpy.array`, shape (n_samples,)
        Simulated targets
    """
    rng = check_random_state(random_state)
    if coefs is None:
        coefs = rng.randn(n_features)
    cov = toeplitz(corr**np.arange(0, n_features))
    X = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
    y = X.dot(coefs) + rng.randn(n_samples)
    return X, y
Esempio n. 4
0
def build_data_linear(true_model_param: torch.FloatTensor,
                      n_samples=NB_OF_POINTS_BY_DEVICE,
                      n_dimensions=DIM,
                      n_devices: int = 1,
                      with_seed: bool = False,
                      without_noise=False,
                      features_corr=0.6,
                      labels_std=0.4):
    """Build data for least-square regression.

    Args:
        true_model_param: the true parameters of the model.
        n_samples: number of sample by devices.
        n_dimensions: dimension of the problem.
        n_devices: number of devices.
        with_seed: true if we want to initialize the pseudo-random number generator.
        features_corr: correlation coefficient used to generate data points.
        labels_std: standard deviation coefficient of the noises added on labels.


    Returns:
        if more than one device, a list of pytorch tensor, otherwise a single tensor.
    """

    X, Y = [], []
    for i in range(n_devices):

        # Construction of a covariance matrix
        cov = toeplitz(features_corr**np.arange(0, n_dimensions))

        if with_seed:
            np.random.seed(0)
        x = torch.from_numpy(
            multivariate_normal(
                np.zeros(n_dimensions), cov,
                size=floor(n_samples)).astype(dtype=np.float64))

        # Simulation of the labels
        y = x.mv(true_model_param) + BIAS

        # We add or not a noise
        if not without_noise:
            if with_seed:
                y += torch.normal(0,
                                  labels_std,
                                  size=(floor(n_samples), 1),
                                  generator=torch.manual_seed(0),
                                  dtype=torch.float64)[0]
            else:
                y += torch.normal(0,
                                  labels_std,
                                  size=(floor(n_samples), 1),
                                  dtype=torch.float64)[0]

        X.append(x)
        Y.append(y)
    if n_devices == 1:
        return X[0], Y[0]
    return X, Y
Esempio n. 5
0
def simu_linreg(x, n_samples, std=1., corr=0.5):
    """Simulation pour le probleme des moindres carrées"""

    d = x.shape[0]
    cov = toeplitz(corr**np.arange(0, d))
    A = multivariate_normal(np.zeros(d), cov, size=n_samples)
    noise = std * randn(n_samples)
    b = A.dot(x) + noise
    return A, b
Esempio n. 6
0
def build_data_logistic(true_model_param: torch.FloatTensor,
                        n_samples=NB_OF_POINTS_BY_DEVICE,
                        n_dimensions=DIM,
                        n_devices: int = 1,
                        with_seed: bool = False,
                        features_corr=0.6,
                        labels_std=0.4):
    """Build data for logistic regression.

    Args:
        true_model_param: the true parameters of the model.
        n_samples: number of sample by devices.
        n_dimensions: dimension of the problem.
        n_devices: number of devices.
        with_seed: true if we want to initialize the pseudo-random number generator.
        features_corr: correlation coefficient used to generate data points.
        labels_std: standard deviation coefficient of the noises added on labels.

    Returns:
        if more than one device, a list of pytorch tensor, otherwise a single tensor.
    """
    X, Y = [], []
    model_copy = deepcopy(true_model_param)
    for i in range(n_devices):

        # We use two different model to simulate non iid data.
        if i % 2 == 0:
            model_copy[(i + 1) % n_dimensions] *= -1
        else:
            model_copy = deepcopy(true_model_param)

        # Construction of a covariance matrix
        cov = toeplitz(features_corr**np.arange(0, n_dimensions))

        if not with_seed:
            np.random.seed(0)

        sign = np.array([1 for j in range(n_dimensions)])
        if i % 2 == 0:
            sign[i % n_dimensions] = -1

        x = torch.from_numpy(sign * multivariate_normal(
            np.zeros(n_dimensions), cov,
            size=floor(n_samples)).astype(dtype=np.float64))

        # Simulation of the labels
        # NB : Logistic syntethic dataset is used to show how Artemis is used in non-i.i.d. settings.
        # This is why, we don't introduce a bias here.
        y = torch.bernoulli(torch.sigmoid(x.mv(model_copy.T)))
        y[y == 0] = -1
        X.append(x)
        Y.append(y)

    if n_devices == 1:
        return X[0], Y[0]
    return X, Y
Esempio n. 7
0
 def simu_linreg_data(n_samples=5000, n_features=50, interc=-1., p_nnz=0.3):
     np.random.seed(123)
     idx = np.arange(1, n_features + 1)
     weights = (-1) ** (idx - 1) * np.exp(-idx / 10.)
     corr = 0.5
     cov = toeplitz(corr ** np.arange(0, n_features))
     X = np.random.multivariate_normal(np.zeros(n_features), cov,
                                       size=n_samples)
     X *= np.random.binomial(1, p_nnz, size=X.shape)
     idx = np.nonzero(X.sum(axis=1))
     X = X[idx]
     n_samples = X.shape[0]
     noise = np.random.randn(n_samples)
     y = X.dot(weights) + noise
     if interc:
         y += interc
     return X, y
Esempio n. 8
0
def features_normal_cov_toeplitz(n_samples=200, n_features=10, rho=0.5):
    """Features obtained as samples of a centered Gaussian vector
    with a toeplitz covariance matrix

    Parameters
    ----------
    n_samples : `int`, default=200
        Number of samples

    n_features : `int`, default=10
        Number of features

    rho : `float`, default=0.5
        Correlation coefficient of the toeplitz correlation matrix

    Returns
    -------
    output : `np.ndarray`, shape=(n_samples, n_features)
    """
    cov = toeplitz(rho**np.arange(0, n_features))
    return np.random.multivariate_normal(np.zeros(n_features),
                                         cov,
                                         size=n_samples)
def test_mm_mixed_norm_bayes():
    """Basic test of the mm_mixed_norm_bayes function"""
    # First we define the problem size and the location of the active sources.
    n_features = 16
    n_samples = 24
    n_times = 5

    X_true = np.zeros((n_features, n_times))
    # Active sources at indices 10 and 30
    X_true[5, :] = 2.
    X_true[10, :] = 2.

    # Construction of a covariance matrix
    rng = np.random.RandomState(0)
    # Set the correlation of each simulated source
    corr = [0.6, 0.95]
    cov = []
    for c in corr:
        this_cov = toeplitz(c**np.arange(0, n_features // len(corr)))
        cov.append(this_cov)

    cov = np.array(linalg.block_diag(*cov))

    # Simulation of the design matrix / forward operator
    G = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)

    # Simulation of the data with some noise
    M = G.dot(X_true)
    M += 0.1 * np.std(M) * rng.randn(n_samples, n_times)
    n_orient = 1

    # Define the regularization parameter and run the solver
    lambda_max = norm_l2inf(np.dot(G.T, M), n_orient)
    lambda_ref = 0.3 * lambda_max
    K = 10
    random_state = 0  # set random seed to make results replicable
    out = mm_mixed_norm_bayes(M,
                              G,
                              lambda_ref,
                              n_orient=n_orient,
                              K=K,
                              random_state=random_state)

    Xs, active_sets = out[:2]
    lpp_samples, lppMAP, pobj_l2half = out[2:]

    freq_occ = np.mean(active_sets, axis=0)
    assert_equal(np.argsort(freq_occ)[-2:], [9, 5])
    assert len(Xs) == K
    assert lpp_samples.shape == (K, )
    assert pobj_l2half.shape == (K, )
    assert lppMAP.shape == (K, )

    out = mm_mixed_norm_bayes(M,
                              G,
                              lambda_ref,
                              n_orient=n_orient,
                              K=K,
                              return_samples=True,
                              random_state=random_state)

    Xs, active_sets = out[:2]
    lpp_samples, lppMAP, pobj_l2half = out[2:-2]
    X_samples, gamma_samples = out[-2:]

    freq_occ = np.mean(active_sets, axis=0)
    assert_equal(np.argsort(freq_occ)[-2:], [9, 5])
    assert len(Xs) == K
    assert lpp_samples.shape == (K, )
    assert pobj_l2half.shape == (K, )
    assert lppMAP.shape == (K, )
    assert X_samples.shape == (K, n_features, n_times, 2)
    assert gamma_samples.shape == (K, n_features, 2)
Esempio n. 10
0
lambda_percent = 20.
K = 1000

X_true = np.zeros((n_features, n_times))
# Active sources at indices 4 and 14
X_true[4, :] = 1.
X_true[14, :] = 1.

###############################################################################
# Construction of a covariance matrix
rng = np.random.RandomState(0)
# Set the correlation of each simulated source
corr = [0.5, 0.95]
cov = []
for c in corr:
    this_cov = toeplitz(c**np.arange(0, n_features // len(corr)))
    cov.append(this_cov)

cov = np.array(linalg.block_diag(*cov))

plt.matshow(cov)
plt.gca().xaxis.set_ticks_position('bottom')
plt.title('True Covariance')

###############################################################################
# Simulation of the design matrix / forward operator
G = rng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)

plt.matshow(G.T.dot(G))
plt.gca().xaxis.set_ticks_position('bottom')
plt.title("Feature covariance")
Esempio n. 11
0
n_samples = 10
n_times = 1
lambda_percent = 50.
K = 5000

X_true = np.zeros((n_features, n_times))
# Active sources at indices 4 and 14
X_true[4, :] = 1.
X_true[14, :] = 1.

###############################################################################
# Construction of a covariance matrix
rng = np.random.RandomState(0)
# Set the correlation of each simulated source
corr = 0.95
cov = toeplitz(corr**np.arange(0, n_features // 2))

###############################################################################
# Simulation of the design matrix / forward operator
G = rng.multivariate_normal(np.zeros(len(cov)), cov, size=n_samples)
G = np.concatenate((G, G), axis=1)
G /= np.linalg.norm(G, axis=0)[np.newaxis, :]  # normalize columns

plt.matshow(G.T.dot(G))
plt.title("Feature covariance")

###############################################################################
# Simulation of the data with some noise
M = G.dot(X_true)
M += 0.2 * np.max(np.abs(M)) * rng.randn(n_samples, n_times)