Ejemplo n.º 1
0
def get_data():
    np.random.seed(0)
    cov1 = np.diag((1, 2))
    N1 = 500
    N2 = 300
    N = N1 + N2
    x1 = multivariate_normal(mean=(1, 2), cov=cov1, size=N1)
    m = np.array(((1, 2), (1, 3)))
    x1 = x1.dot(m)
    x2 = multivariate_normal(mean=(-1, 10), cov=cov1, size=N2)
    x = np.vstack((x1, x2))
    y = np.array([True] * 500 + [False] * 300)
    return x, y
Ejemplo n.º 2
0
def build_data_linear(true_model_param: torch.FloatTensor,
                      n_samples=NB_OF_POINTS_BY_DEVICE,
                      n_dimensions=DIM,
                      n_devices: int = 1,
                      with_seed: bool = False,
                      without_noise=False,
                      features_corr=0.6,
                      labels_std=0.4):
    """Build data for least-square regression.

    Args:
        true_model_param: the true parameters of the model.
        n_samples: number of sample by devices.
        n_dimensions: dimension of the problem.
        n_devices: number of devices.
        with_seed: true if we want to initialize the pseudo-random number generator.
        features_corr: correlation coefficient used to generate data points.
        labels_std: standard deviation coefficient of the noises added on labels.


    Returns:
        if more than one device, a list of pytorch tensor, otherwise a single tensor.
    """

    X, Y = [], []
    for i in range(n_devices):

        # Construction of a covariance matrix
        cov = toeplitz(features_corr**np.arange(0, n_dimensions))

        if with_seed:
            np.random.seed(0)
        x = torch.from_numpy(
            multivariate_normal(
                np.zeros(n_dimensions), cov,
                size=floor(n_samples)).astype(dtype=np.float64))

        # Simulation of the labels
        y = x.mv(true_model_param) + BIAS

        # We add or not a noise
        if not without_noise:
            if with_seed:
                y += torch.normal(0,
                                  labels_std,
                                  size=(floor(n_samples), 1),
                                  generator=torch.manual_seed(0),
                                  dtype=torch.float64)[0]
            else:
                y += torch.normal(0,
                                  labels_std,
                                  size=(floor(n_samples), 1),
                                  dtype=torch.float64)[0]

        X.append(x)
        Y.append(y)
    if n_devices == 1:
        return X[0], Y[0]
    return X, Y
Ejemplo n.º 3
0
def build_data_logistic(true_model_param: torch.FloatTensor,
                        n_samples=NB_OF_POINTS_BY_DEVICE,
                        n_dimensions=DIM,
                        n_devices: int = 1,
                        with_seed: bool = False,
                        features_corr=0.6,
                        labels_std=0.4):
    """Build data for logistic regression.

    Args:
        true_model_param: the true parameters of the model.
        n_samples: number of sample by devices.
        n_dimensions: dimension of the problem.
        n_devices: number of devices.
        with_seed: true if we want to initialize the pseudo-random number generator.
        features_corr: correlation coefficient used to generate data points.
        labels_std: standard deviation coefficient of the noises added on labels.

    Returns:
        if more than one device, a list of pytorch tensor, otherwise a single tensor.
    """
    X, Y = [], []
    model_copy = deepcopy(true_model_param)
    for i in range(n_devices):

        # We use two different model to simulate non iid data.
        if i % 2 == 0:
            model_copy[(i + 1) % n_dimensions] *= -1
        else:
            model_copy = deepcopy(true_model_param)

        # Construction of a covariance matrix
        cov = toeplitz(features_corr**np.arange(0, n_dimensions))

        if not with_seed:
            np.random.seed(0)

        sign = np.array([1 for j in range(n_dimensions)])
        if i % 2 == 0:
            sign[i % n_dimensions] = -1

        x = torch.from_numpy(sign * multivariate_normal(
            np.zeros(n_dimensions), cov,
            size=floor(n_samples)).astype(dtype=np.float64))

        # Simulation of the labels
        # NB : Logistic syntethic dataset is used to show how Artemis is used in non-i.i.d. settings.
        # This is why, we don't introduce a bias here.
        y = torch.bernoulli(torch.sigmoid(x.mv(model_copy.T)))
        y[y == 0] = -1
        X.append(x)
        Y.append(y)

    if n_devices == 1:
        return X[0], Y[0]
    return X, Y
Ejemplo n.º 4
0
 def _get_likelihood(self, x, noise):
     mean = self.likelihoods.get(x[0], self.class_priors)
     cov = self.likelihoods_cov.get(x[0], np.diag(np.zeros((self.nclass,))))
     if noise:
         if isinstance(noise, float):
             cov = np.diag(np.ones((self.nclass,)) * noise)
         lh = np.abs(multivariate_normal(mean, cov))
         return lh / lh.sum()
     else:
         return mean
Ejemplo n.º 5
0
def simulate_log_reg(n_samples, coef, intercept=None):
    n_features = coef.shape[0]
    cov = toeplitz(0.5**np.arange(0, n_features))
    X = multivariate_normal(np.zeros(n_features), cov, size=n_samples)
    logits = X.dot(coef)
    if intercept is not None:
        logits += intercept
    p = sigmoid(logits)
    y = np.random.binomial(1, p, size=n_samples).astype("float64")
    y[:] = 2 * y - 1
    return X, y
 def test_multivariate_normal_size_types(self):
     # Test for multivariate_normal issue with 'size' argument.
     # Check that the multivariate_normal size argument can be a
     # numpy integer.
     random.multivariate_normal([0], [[0]], size=1)
     random.multivariate_normal([0], [[0]], size=np.int_(1))
     random.multivariate_normal([0], [[0]], size=np.int64(1))
Ejemplo n.º 7
0
def simulate(n_samples, w0, b0=None):
    n_features = w0.shape[0]
    cov = toeplitz(0.5**np.arange(0, n_features))
    X = multivariate_normal(np.zeros(n_features), cov, size=n_samples)

    X = StandardScaler().fit_transform(X)
    logits = X.dot(w0)
    if b0 is not None:
        logits += b0
    p = sigmoid(logits)
    y = np.random.binomial(1, p, size=n_samples).astype("float64")
    y[:] = 2 * y - 1
    y = y.astype("float64")
    return X, y
def multivariate_normal(mean, cov, shape=[]):
    """multivariate_normal(mean, cov) or multivariate_normal(mean, cov, [m, n, ...])
    returns an array containing multivariate normally distributed random numbers
    with specified mean and covariance.

    mean must be a 1 dimensional array. cov must be a square two dimensional
    array with the same number of rows and columns as mean has elements.

    The first form returns a single 1-D array containing a multivariate
    normal.

    The second form returns an array of shape (m, n, ..., cov.shape[0]).
    In this case, output[i,j,...,:] is a 1-D array containing a multivariate
    normal."""
    if shape == []:
        shape = None
    return mt.multivariate_normal(mean, cov, shape)
Ejemplo n.º 9
0
def multivariate_normal(mean, cov, shape=[]):
    """multivariate_normal(mean, cov) or multivariate_normal(mean, cov, [m, n, ...])
    returns an array containing multivariate normally distributed random numbers
    with specified mean and covariance.

    mean must be a 1 dimensional array. cov must be a square two dimensional
    array with the same number of rows and columns as mean has elements.

    The first form returns a single 1-D array containing a multivariate
    normal.

    The second form returns an array of shape (m, n, ..., cov.shape[0]).
    In this case, output[i,j,...,:] is a 1-D array containing a multivariate
    normal."""
    if shape == []:
        shape = None
    return mt.multivariate_normal(mean, cov, shape)
Ejemplo n.º 10
0
def generateData(num,mean=100,var=0):
    X=np.random.rand(num)

    S=np.random.randn(num)
    tmp=[sin( 2*pi * m) for m in X]
    T=[]
    for i in range(num):
        T.append(tmp[i]+S[i]/100)

    assert len(X)==len(T)
    R=[]
    for i in range(len(X)):
        R.append(multivariate_normal([mean], [[var]]))
        # R.append(0)
    D=[]
    for x,t,r in zip(X,T,R):
        D.append((x,t,r))
    return D
Ejemplo n.º 11
0
def generateThiDimensionalData\
                (num,featurenum=3):
    """

    :param vars:
    :param means:
    :param num:数据量
    :return:
    """
    XX = []

    mean = [1, 0, 0]
    vars = [[100, 0, 0], [0, 10, 0], [0, 0.1, 0]]
    for i in range(num):
        XX.append(multivariate_normal(mean, vars))




    return XX
Ejemplo n.º 12
0
from linlearn.plot import plot_history

from time import sleep

np.set_printoptions(precision=2)

n_samples = 1_000
epoch_size = n_samples
n_features = 50
fit_intercept = True

coef0 = np.random.randn(n_features)
intercept0 = -2.0

cov = toeplitz(0.5**np.arange(0, n_features))
X = multivariate_normal(np.zeros(n_features), cov, size=n_samples)

y = X.dot(coef0) + 0.1 * np.random.randn(n_samples)
if fit_intercept:
    y += intercept0

if fit_intercept:
    w_start = np.zeros(n_features + 1)
else:
    w_start = np.zeros(n_features)

if fit_intercept:
    lip_max = (X**2).sum(axis=1).max() + 1
else:
    lip_max = (X**2).sum(axis=1).max()
Ejemplo n.º 13
0
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules.softmax import SoftmaxLayer
from pybrain.supervised.trainers.backprop import BackpropTrainer
from builtins import range
from pybrain.utilities import percentError
from pylab import figure, plot, ioff, clf, hold, contourf, ion, draw, show
from pybraincorrected.datasets.classification import ClassificationDataSet


means = [(-1, 0), (2, 4), (3, 1)]
cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
alldata = ClassificationDataSet(2, 1, nb_classes=3)

for n in range(400):
    for klass in range(3):
        input_ = multivariate_normal(means[klass], cov[klass])
        alldata.addSample(input_, [klass])
        #print(n, klass, input_)

testdata, traindata = alldata.splitWithProportion(0.25)
testdata._convertToOneOfMany()
traindata._convertToOneOfMany()

print("data dimensions ", len(traindata), traindata.indim, testdata.indim)
print("sample test data input", traindata['input'][0])
print("sample test data target", traindata['target'][0])
print("sample test data class",  traindata["class"][0])

#for key in traindata.data:
#    print(key)