Esempio n. 1
0
def gaussian_dd(n_samples, n_features=2):  ##{{{
    """
	SBCK.datasets.gaussian_dd
	=========================
	
	Build a test dataset such that X0, X1 and Y0 are multivariate normal distribution.
	
	Parameters
	----------
	n_samples : integer
		Number of samples in X0, X1 and Y0
	n_features : integer
		dimension, default is 2
	
	Returns
	-------
	Y0,X0,X1 : tuple
		- Y0 reference dataset in calibration period
		- X0 biased dataset in calibration period
		- X1 biased dataset in projection period
	"""
    X0 = np.random.multivariate_normal(mean=np.zeros(n_features),
                                       cov=skd.make_spd_matrix(n_features),
                                       size=n_samples)
    X1 = np.random.multivariate_normal(mean=np.zeros(n_features) + 5,
                                       cov=skd.make_spd_matrix(n_features),
                                       size=n_samples)
    Y0 = np.random.multivariate_normal(mean=np.zeros(n_features) - 2,
                                       cov=skd.make_spd_matrix(n_features),
                                       size=n_samples)
    return Y0, X0, X1
    def __init__(self, rng, n_samples=200, n_components=2, n_features=2,
                 scale=50):
        self.n_samples = n_samples
        self.n_components = n_components
        self.n_features = n_features

        self.weights = rng.rand(n_components)
        self.weights = self.weights / self.weights.sum()
        self.means = rng.rand(n_components, n_features) * scale
        self.covariances = {
            'spherical': .5 + rng.rand(n_components),
            'diag': (.5 + rng.rand(n_components, n_features)) ** 2,
            'tied': make_spd_matrix(n_features, random_state=rng),
            'full': np.array([
                make_spd_matrix(n_features, random_state=rng) * .5
                for _ in range(n_components)])}
        self.precisions = {
            'spherical': 1. / self.covariances['spherical'],
            'diag': 1. / self.covariances['diag'],
            'tied': linalg.inv(self.covariances['tied']),
            'full': np.array([linalg.inv(covariance)
                             for covariance in self.covariances['full']])}

        self.X = dict(zip(COVARIANCE_TYPE, [generate_data(
            n_samples, n_features, self.weights, self.means, self.covariances,
            covar_type) for covar_type in COVARIANCE_TYPE]))
        self.Y = np.hstack([np.full(int(np.round(w * n_samples)), k,
                                    dtype=int)
                            for k, w in enumerate(self.weights)])
def main():

    p1 = make_spd_matrix(2)
    p2 = make_spd_matrix(2)
    p3 = avg_func(p1, p2, 0.5)

    plot_data(p1, p2, p3)

    return
Esempio n. 4
0
    def __init__(self,
                 rng,
                 n_samples=200,
                 n_components=2,
                 n_features=2,
                 scale=50):
        self.n_samples = n_samples
        self.n_components = n_components
        self.n_features = n_features

        self.weights = rng.rand(n_components)
        self.weights = self.weights / self.weights.sum()
        self.means = rng.rand(n_components, n_features) * scale
        self.covariances = {
            "spherical":
            0.5 + rng.rand(n_components),
            "diag": (0.5 + rng.rand(n_components, n_features))**2,
            "tied":
            make_spd_matrix(n_features, random_state=rng),
            "full":
            np.array([
                make_spd_matrix(n_features, random_state=rng) * 0.5
                for _ in range(n_components)
            ]),
        }
        self.precisions = {
            "spherical":
            1.0 / self.covariances["spherical"],
            "diag":
            1.0 / self.covariances["diag"],
            "tied":
            linalg.inv(self.covariances["tied"]),
            "full":
            np.array([
                linalg.inv(covariance)
                for covariance in self.covariances["full"]
            ]),
        }

        self.X = dict(
            zip(
                COVARIANCE_TYPE,
                [
                    generate_data(
                        n_samples,
                        n_features,
                        self.weights,
                        self.means,
                        self.covariances,
                        covar_type,
                    ) for covar_type in COVARIANCE_TYPE
                ],
            ))
        self.Y = np.hstack([
            np.full(int(np.round(w * n_samples)), k, dtype=int)
            for k, w in enumerate(self.weights)
        ])
Esempio n. 5
0
    def test_Metric(self):
        np.random.seed(28)
        for d in [iris, wine, breast_cancer]:
            X, y = d()
            n, d = X.shape
            M = make_spd_matrix(d)

            metric = Metric(M)
            metric.fit(X, y)
            L = metric.transformer()
            assert_array_almost_equal(L.T.dot(L), M)

            LX1 = metric.transform()
            LX2 = metric.transform(X)

            dl1 = pdist(LX1)
            dl2 = pdist(LX2)
            dm = pdist(X, metric='mahalanobis',
                       VI=M)  # CHecking that d_M = d_L

            assert_array_almost_equal(dm, dl1)
            assert_array_almost_equal(dm, dl2)

            d_, d = L.shape
            e_, e = M.shape

            assert_equal(d, e_)
            assert_equal(d, e)
            assert_equal(d, X.shape[1])
Esempio n. 6
0
def generate_data_bivariate(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p[0]
    p = n_p[1]
    theta = np.array([0.5, 0.9])
    b = [1 / k for k in range(1, p + 1)]
    sigma = make_spd_matrix(p)

    # generating data
    x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ])
    G = _g(np.dot(x, b))
    M0 = _m(np.dot(x, b))
    M1 = _m2(np.dot(x, b))
    D0 = M0 + np.random.standard_normal(size=[n, ])
    D1 = M1 + np.random.standard_normal(size=[n, ])
    y = theta[0] * D0 + theta[1] * D1 + G + np.random.standard_normal(size=[n, ])
    d = np.column_stack((D0, D1))
    column_names = [f'X{i + 1}' for i in np.arange(p)] + ['y'] + \
                   [f'd{i + 1}' for i in np.arange(2)]
    data = pd.DataFrame(np.column_stack((x, y, d)),
                        columns=column_names)

    return data
Esempio n. 7
0
def generate_general(nv, m, ns, normalize=False, shuffle=False):
    """ Generate general data using make_spd_matrix() function.

    :param nv:        Number of observed variables
    :param m:         Number of latent factors
    :param ns:        Number of samples for each time step
    :param normalize: Whether to set Var[x] = 1
    :param shuffle:   Whether to shuffle to x_i's
    :return: (data, ground_truth_cov)
    """
    assert nv % m == 0
    b = nv // m  # block size

    sigma = np.zeros((nv, nv))
    for i in range(m):
        block_cov = make_spd_matrix(b)
        if normalize:
            std = np.sqrt(block_cov.diagonal()).reshape((b, 1))
            block_cov /= std
            block_cov /= std.T
        sigma[i * b:(i + 1) * b, i * b:(i + 1) * b] = block_cov

    if shuffle:
        perm = range(nv)
        random.shuffle(perm)
        sigma_perm = np.zeros((nv, nv))
        for i in range(nv):
            for j in range(nv):
                sigma_perm[i, j] = sigma[perm[i], perm[j]]
        sigma = sigma_perm

    mu = np.zeros((nv,))
    return np.random.multivariate_normal(mu, sigma, size=(ns,)), sigma
Esempio n. 8
0
def generate_data_iivm_binary(request):
    n_p = request.param
    np.random.seed(1111)
    # setting parameters
    n = n_p[0]
    p = n_p[1]
    theta = 0.5
    b = [1 / k for k in range(1, p + 1)]
    sigma = make_spd_matrix(p)

    # generating data
    x = np.random.multivariate_normal(np.zeros(p), sigma, size=[n, ])
    G = _g(np.dot(x, b))

    prz = 1 / (1 + np.exp((-1) * (x[:, 0] * (-1) * b[4] + x[:, 1] * b[2] + np.random.standard_normal(size=[n, ]))))
    z = np.random.binomial(p=prz, n=1, size=[n, ])
    u = np.random.standard_normal(size=[n, ])
    pr = 1 / (1 + np.exp((-1) * (0.5 * z + x[:, 0] * (-0.5) + x[:, 1] * 0.25 - 0.5 * u + np.random.standard_normal(size=[n, ]))))
    d = np.random.binomial(p=pr, n=1, size=[n, ])
    err = np.random.standard_normal(n)

    pry = 1 / (1 + np.exp((-1) * theta * d + G + 4 * u + err))
    y = np.random.binomial(p=pry, n=1, size=[n, ])

    return x, y, d, z
Esempio n. 9
0
    def init_norm_mix(self, nnorm=4):
        # init normal mixture (hardest)
        weights = np.random.uniform(1, 10, size=nnorm)
        weights /= sum(weights)
        if self.d > 10:
            scale = [2] * 10 + ([.1] * (self.d - 10))
        else:
            scale = 2
        meanm = [
            np.random.normal(loc=0, scale=scale, size=self.d)
            for i in range(nnorm - 1)
        ]
        meanm.append(-np.sum(meanm, axis=0))
        covm = [make_spd_matrix(self.d) for i in range(nnorm - 1)]

        comb = self.incomplete_comb(weights, meanm, covm)
        fac = .9 / max(np.linalg.eig(comb)[0])
        meanm = np.multiply(meanm, np.sqrt(fac))
        covm = np.multiply(covm, fac)

        comb = self.incomplete_comb(weights, meanm, covm)
        I = self.variance * np.eye(self.d)
        lastCov = (I - comb) / weights[-1]
        lastCov.shape = (1, ) + lastCov.shape
        covm = np.append(covm, lastCov, axis=0)

        self.weights = weights.flatten()
        self.means = meanm
        self.covs = covm
def main():
    np.random.seed(12)
    data_dim = 8
    n_data = 10
    threshold_missing = 0.5
    mu = np.random.randn(data_dim, 1)
    sigma = make_spd_matrix(
        n_dim=data_dim)  # Generate a random positive semi-definite matrix
    # test if the matrix is positive definite
    # print(is_pos_def(sigma))
    x_full = gauss.gauss_sample(mu, sigma, n_data)
    missing = np.random.rand(n_data, data_dim) < threshold_missing
    x_miss = np.copy(x_full)
    x_miss[missing] = np.nan
    x_imputed = gauss.gauss_impute(mu, sigma, x_miss)
    #Create a matrix from x_miss by replacing the NaNs with 0s to display the hinton_diagram
    xmiss0 = np.copy(x_miss)
    for g in np.argwhere(np.isnan(x_miss)):
        xmiss0[g[0], g[1]] = 0

    plot_1 = plt.figure(1)
    pml.hinton_diagram(xmiss0, ax=plot_1.gca())
    plot_1.suptitle('Observed')
    pml.savefig("gauss_impute_observed.pdf", dpi=300)

    plot_2 = plt.figure(2)
    pml.hinton_diagram(x_full, ax=plot_2.gca())
    plot_2.suptitle('Hidden truth')
    pml.savefig("gauss_impute_truth.pdf", dpi=300)

    plot_3 = plt.figure(3)
    pml.hinton_diagram(x_imputed, ax=plot_3.gca())
    plot_3.suptitle('Imputation with true params')
    pml.savefig("gauss_impute_pred.pdf", dpi=300)
    plt.show()
    def Initialize_parameters(self, data_points):
        '''
        Randomly initializes the parameters of each Gaussian distribution.\n
        It initializes n_cluster mean values of the same dimention as the datapoints.
        It initializes n_cluster covariance matrices. Each of these matrices is a 
        diagonal square matrix of the dimention of the datapoints.
        It initializes n_cluster weights for each distribution. The weights sum to one.
        
        Parameters
        ----------
        data_points : ndarray
            A 2D numpy array of all the datapoints and the dimention of the data.

        Returns
        -------
        None.

        '''
        from sklearn.datasets import make_spd_matrix
        self.means = np.random.permutation(data_points)[:self.n_clusters]
        self.covariances = np.zeros(
            (self.n_clusters, data_points.shape[-1], data_points.shape[-1]))
        for c in range(self.n_clusters):
            self.covariances[c] = make_spd_matrix(data_points.shape[-1])
        self.weights = np.random.dirichlet(np.ones(self.n_clusters), size=1)[0]
        self.r = np.zeros((self.n_clusters, data_points.shape[0]))
Esempio n. 12
0
def convex_quad_min():
    """Returns objective, gradient, Hessian, solution for a convex QP.

    Returns
    -------
    fobj : function
        Convex, quadratic objective function
    fgrad : function
        Gradient of the objective
    fhess : function
        Hessian of the objective (constant)
    sol : numpy.ndarray
        Global minimizer of the function
    """
    # PRNG seed
    _seed = 7
    # number of features/dimensionality, PRNG
    n_dim = 10
    rng = np.random.default_rng(_seed)
    # make positive definite hessian by adding scaled identity matrix
    hess = make_spd_matrix(n_dim, random_state=_seed)
    hess += 1e-4 * np.eye(n_dim)
    # random linear terms drawn from [-5, 5]
    coef = rng.uniform(low=-5., high=5., size=n_dim)
    # objective function, gradient, and hessian
    fobj = lambda x: 0.5 * x @ hess @ x + coef @ x
    fgrad = lambda x: hess @ x + coef
    fhess = lambda x: hess
    # compute solution using scipy.linalg.solve
    sol = linalg.solve(hess, -coef, check_finite=False, assume_a="pos")
    # return fobj, fgrad, fhess, sol
    return fobj, fgrad, fhess, sol
Esempio n. 13
0
    def _initialize_core(self, mu=None, sigma=None, delta=None):
        """
		Initialize a Core within the data space.

		Parameters
		----------
		mu : array-like, shape (n_features,), default=None
			Mean of the Core.

		sigma : array-like, shape (n_features, n_features), default=None
			Covariance of the Core.

		delta : array-like, shape (n_features,), default=None
			Weight of the Core.

		Returns
		-------
		core : Core
			A Core within the data space given by `data`.
		"""
        if mu is not None and sigma is not None and delta is not None:
            return Core(mu=mu, sigma=sigma, delta=delta)
        elif self._data_range is not None:
            mu = self.random_state.rand(self.dim) * \
              (self._data_range[1] - self._data_range[0]) + \
              self._data_range[0]
            sigma = make_spd_matrix(self.dim)
            delta = np.ones((1)) / self.init_cores
            return Core(mu=mu, sigma=sigma, delta=delta)
        else:
            raise RuntimeError(
                "Data Range hasn't been set, likely because GMM hasn't been initialized yet"
            )
Esempio n. 14
0
def toy_data():
    name1 = "A"
    n1 = 10
    x1 = np.arange(n1)
    y1 = np.random.random(n1)

    name2 = "B"
    n2 = 20
    x2 = np.arange(n2)
    y2 = np.random.random(n2)

    name3 = "C"
    n3 = 30
    x3 = np.arange(n3)
    y3 = np.random.random(n3)

    # Generate arbitrary covariance matrix, partition into parts
    full_cov = make_spd_matrix(n1 + n2 + n3, random_state=1234)
    cov1 = full_cov[:n1, :n1]
    cov2 = full_cov[n1:n1 + n2, n1:n1 + n2]
    cov3 = full_cov[n1 + n2:, n1 + n2:]

    data1 = GaussianData(name1, x1, y1, cov1)
    data2 = GaussianData(name2, x2, y2, cov2)
    data3 = GaussianData(name3, x3, y3, cov3)

    cross_cov = CrossCov({
        (name1, name2): full_cov[:n1, n1:n1 + n2],
        (name1, name3): full_cov[:n1, n1 + n2:],
        (name2, name3): full_cov[n1:n1 + n2, n1 + n2:],
    })

    return [data1, data2, data3], cross_cov
Esempio n. 15
0
def run_test_conv(in_features, n_iterations=1000, size=1000, printing_step=10):

    coder = Convolution_Autoencoder(in_features**2)
    dataset = [FloatTensor(make_spd_matrix(in_features)).unsqueeze(0) for _ in range(size)]

    dataset = Variable(torch.cat(dataset, 0).reshape(size, 1, in_features, in_features))

    test_size = 30

    train_dataset = dataset[:-test_size]
    test_dataset = dataset[-test_size:]

    optimizer = optim.Adam(coder.parameters(), lr=0.1)
    loss_function = nn.MSELoss()

    for epoch in tqdm(range(1, n_iterations)):
        optimizer.zero_grad()

        outputs_train = coder(train_dataset)
        outputs_test = coder(test_dataset)

        loss_train = loss_function(outputs_train, train_dataset)
        loss_test = loss_function(outputs_test, test_dataset)

        loss_train.backward(retain_graph=True)
        if epoch % printing_step == 0:
            print("EPOCH: {0}, TRAIN LOSS: {1}, TEST LOSS".format(epoch, loss_train.data[0]), float(loss_test.data[0]))

        if epoch == 1000:
            optimizer.state_dict()['param_groups'][0]['lr'] == optimizer.state_dict()['param_groups'][0]['lr'] * 0.1
        optimizer.step()

    return coder, dataset
Esempio n. 16
0
def run_detetmenant(in_features, n_iterations=100, size=100):
    coder = DetNet(in_features)
    dataset = [FloatTensor(make_spd_matrix(in_features)).unsqueeze(0) for _ in range(size)]

    dataset = Variable(torch.cat(dataset, 0))

    test_size = 30

    train_dataset = dataset[:-test_size]
    test_dataset = dataset[-test_size:]

    optimizer = optim.Adam(coder.parameters(), lr=0.1)
    criterion = MSELoss()

    for epoch in tqdm(range(1, n_iterations)):
        optimizer.zero_grad()

        outputs_train = coder(train_dataset)
        outputs_test = coder(test_dataset)

        loss_train = criterion(outputs_train, train_dataset)
        loss_test = criterion(outputs_test, test_dataset)

        loss_train.backward(retain_graph=True)
        if epoch % 10 == 0:
            print("EPOCH: {0}, TRAIN LOSS: {1}, TEST LOSS".format(epoch, loss_train.data[0]), loss_test.data[0])
        optimizer.step()


    return coder, dataset
Esempio n. 17
0
    def test_forest(self):
        """
        Tests the forest construction by firstly ensuring the MSTs are identical when 
        the correlation matrix only has unique edges, and secondly when the correlation
        matrix is degenerate
        """
        p = 10
        mean = np.zeros(p)
        M = make_spd_matrix(p)
        X = np.random.multivariate_normal(mean, M, 200)
        corr = np.corrcoef(X.T)
        nodes = list(np.arange(p))
        mst = topcorr.mst(corr)
        forest = topcorr.mst_forest(corr)

        M_mst = nx.to_numpy_array(mst, nodes)
        M_forest = nx.to_numpy_array(forest, nodes)

        assert_array_almost_equal(M_mst, M_forest)

        example_mat = np.array([[0, 0.1, 0.3, 0.2,
                                 0.1], [0.1, 0, 0.3, 0.4, 1.7],
                                [0.3, 0.3, 0, 0.6,
                                 0.5], [0.2, 0.4, 0.6, 0, 0.2],
                                [0.1, 1.7, 0.5, 0.2, 0]])
        example_corr = 1 - np.power(example_mat, 2) / 2
        forest = topcorr.mst_forest(example_corr)
        mst = topcorr.mst(example_corr)
        forest_edges = len(forest.edges)
        mst_edges = len(mst.edges)

        assert (forest_edges > mst_edges)
        assert (nx.is_connected(forest))
Esempio n. 18
0
 def AlphaPrior():
     mean_vec = alpha_mean_vec
     cov_mat = make_spd_matrix(n_dim=2)  # the covariance matrix must be SPD
     Alpha = dict()
     Alpha['Value'] = rand.multivariate_normal(mean=mean_vec, cov=cov_mat)
     Alpha['Mean'] = mean_vec
     Alpha['Cov'] = cov_mat
     return Alpha
Esempio n. 19
0
def make_covar_matrix(covariance_type,
                      n_components,
                      n_features,
                      random_state=None):
    mincv = 0.1
    prng = check_random_state(random_state)
    if covariance_type == 'spherical':
        return (mincv + mincv * prng.random_sample((n_components, )))**2
    elif covariance_type == 'tied':
        return (make_spd_matrix(n_features) + mincv * np.eye(n_features))
    elif covariance_type == 'diag':
        return (mincv + mincv * prng.random_sample(
            (n_components, n_features)))**2
    elif covariance_type == 'full':
        return np.array([(make_spd_matrix(n_features, random_state=prng) +
                          mincv * np.eye(n_features))
                         for x in range(n_components)])
def normal_data_with_cov(nrObservations, nrFeatures, seed):
    
    np.random.seed(seed)
    cov_mat = dts.make_spd_matrix(nrFeatures)
    mean_array = np.zeros(nrFeatures)
    data = np.random.multivariate_normal(mean = mean_array, cov = cov_mat, size=nrObservations)
    
    return data
Esempio n. 21
0
def main():
    N = 800
    d = 2
    np.random.seed(20)
    k = 3
    w = np.random.rand(k)
    w = w / np.sum(w)
    print(w)
    Z = generateZ(d, k)
    index = [i for i in range(k)]
    #print(index)
    print(Z)
    X = np.zeros(N)
    Y = np.zeros(N)
    #mu = []
    Z_idxs = []
    Xn = []
    for j in range(N):
        z_idx = np.random.choice(index, p=w)
        #print(z_idx)
        mu_idx, sigma_idx = Z[z_idx]
        #mu.append(np.random.rand())
        #cov.append(sigma_idx)
        Z_idxs.append(z_idx)
        X[j], Y[j] = np.random.multivariate_normal(mu_idx,
                                                   sigma_idx,
                                                   check_valid='warn')
        #print("x = ",X[j],", y = ",Y[j])
        x_j = np.array([X[j], Y[j]])
        Xn.append(x_j)
    plt.scatter(X, Y)
    Mu = np.random.random((k, d))
    print(Mu)
    Cov = []

    for i in range(k):

        seed = i + 10
        z = make_spd_matrix(d, random_state=seed)  #(z_sym + z_sym.T)/2
        #mu = np.random.rand(d)
        Cov.append(z)


#    print(Cov)
    Cov = np.array(Cov)
    print(Cov)
    w = np.random.random(k)
    #    print(w)
    w = w / np.sum(w)
    print(w)
    Xn = np.array(Xn)
    #    print(Xn)
    start_time = time.clock()
    #    get_E_step(Xn, Mu, Cov, w, d, k, N)
    EM_algo(Xn, k, Mu, Cov, w, d, N)
    end_time = time.clock()
    #    print(log_like)
    print(end_time - start_time, "s")
Esempio n. 22
0
def gen_data(dim, N_size):
    from random import choice
    from sklearn.datasets import make_spd_matrix
    # Define the number of samples
    num_samples = N_size
    C = make_spd_matrix(dim, random_state=123) + 0.000001 * np.identity(dim)
    mean = [0 for i in xrange(dim)]
    X = np.random.multivariate_normal(mean, C, num_samples)
    return X, C
Esempio n. 23
0
def make_spd_matrices_4d(num_samples, num_data_points, num_dimensions,
                         random_state):
    matrices = np.zeros(
        (num_samples, num_data_points, num_dimensions, num_dimensions))
    for i in range(num_samples):
        for j in range(num_data_points):
            matrices[i, j, :, :] = make_spd_matrix(num_dimensions,
                                                   random_state=random_state)
    return matrices
Esempio n. 24
0
def make_lqr(state_size, action_size):
    n_dim = state_size + action_size

    F = np.random.normal(size=(state_size, n_dim))
    f = np.random.normal(size=(state_size, 1))

    C = make_spd_matrix(n_dim)
    c = np.random.normal(size=(n_dim, 1))

    return LQR(F, f, C, c)
def test_make_spd_matrix():
    X = make_spd_matrix(n_dim=5, random_state=0)

    assert_equal(X.shape, (5, 5), "X shape mismatch")
    assert_array_almost_equal(X, X.T)

    from numpy.linalg import eig
    eigenvalues, _ = eig(X)
    assert_array_equal(eigenvalues > 0, np.array([True] * 5),
                       "X is not positive-definite")
def test_make_spd_matrix():
    X = make_spd_matrix(n_dim=5, random_state=0)

    assert_equal(X.shape, (5, 5), "X shape mismatch")
    assert_array_almost_equal(X, X.T)

    from numpy.linalg import eig
    eigenvalues, _ = eig(X)
    assert_array_equal(eigenvalues > 0, np.array([True] * 5),
                       "X is not positive-definite")
Esempio n. 27
0
    def test_one_class(self):
        # if there is only one class the gradient is null, so the final matrix
        #  must stay like the initialization
        X = self.iris_points[self.iris_labels == 0]
        y = self.iris_labels[self.iris_labels == 0]

        A = make_spd_matrix(X.shape[1], X.shape[1])
        nca = NCA(init=A, max_iter=30, n_components=X.shape[1])
        nca.fit(X, y)
        assert_array_equal(nca.components_, A)
Esempio n. 28
0
def make_covar_matrix(covariance_type, n_states, n_features, random_state=None):
    mincv = 0.1
    prng = check_random_state(random_state)
    if covariance_type == "spherical":
        return (mincv + mincv * prng.random_sample((n_states,))) ** 2
    elif covariance_type == "tied":
        return make_spd_matrix(n_features) + mincv * np.eye(n_features)
    elif covariance_type == "diagonal":
        return (mincv + mincv * prng.random_sample((n_states, n_features))) ** 2
    elif covariance_type == "full":
        return np.array(
            [
                (
                    make_spd_matrix(n_features, random_state=prng)
                    + mincv * np.eye(n_features)
                )
                for _ in range(n_states)
            ]
        )
 def __init__(self, data, clusters, color="test", max_itr=400, eps=1e-8):
     self.train_data = data
     self.clusters = clusters
     self.color = self.color2pixel(color)
     self.max_itr = max_itr
     self.eps = eps
     self.weights = np.ones(self.clusters) / self.clusters
     self.means = np.random.choice(data.flatten(),
                                   (self.clusters, data.shape[-1]))
     self.cov = np.array(
         [make_spd_matrix(data.shape[-1]) for i in range(self.clusters)])
Esempio n. 30
0
def generateZ(d, k):
    Z = []
    mu = np.array([[0.3, 4], [5.5, 0.25], [7, 7]])
    for i in range(k):
        #z_sym = np.random.rand(d,d)
        #z = (np.tril(z_sym) + np.tril(z_sym).T)/2
        seed = i + 10
        z = make_spd_matrix(d, random_state=seed)  #(z_sym + z_sym.T)/2
        #mu = np.random.rand(d)
        Z.append((mu[i], z))
    return Z
Esempio n. 31
0
def sim_Unobs_Data(xDim, nSim, p_AgivenZ, p_AgivenNotZ):
    #** Follow the graph to generate Y
    # Y = X * beta + U * gamma + Z * CATE

    #** Add a U to X, A and Y
    # confounders
    x_Sigma = make_spd_matrix(xDim)
    X = np.random.multivariate_normal(np.zeros(xDim), x_Sigma, size=nSim)
    beta = np.random.choice(5, xDim, replace=True, p=[.3, .25, .2, .15, .1])

    U = np.random.normal(0.5 * np.ones(nSim), 1)
    gamma = 2

    # ground truth CATE
    CATE = np.array([1, 2, 3, 4])
    p_G = np.exp(X[:, 1]) / (1 + np.exp(X[:, 1]))
    Group = np.zeros(nSim)
    Group[p_G < 0.75] = 1
    Group[p_G < 0.5] = 2
    Group[p_G < 0.25] = 3
    Group = Group.astype(int)

    # Z encourage A, and split-treatment criterion p_A_given_Z_X > p_A_given_notZ_X
    p_A_given_Z = [p_AgivenZ for x in X]  #if x[0] > 0 else p_AgivenZ-0.1
    p_A_given_notZ = [p_AgivenNotZ for x in X]
    Compliance = np.array(p_A_given_Z) - np.array(p_A_given_notZ)
    print('Avg compliance:', np.mean(Compliance))

    # randomized treatment
    Z = np.random.choice(2, nSim)

    A = [
        np.random.choice(2, 1, p=[1 - p_A_given_Z[i], p_A_given_Z[i]])
        if Z[i] == 1 else np.random.choice(
            2, 1, p=[1 - p_A_given_notZ[i], p_A_given_notZ[i]])
        for i in range(nSim)
    ]

    # ground truth two-arm potential outcomes
    Y_0 = np.random.normal(np.sum(X * beta, 1) + U * gamma, 1)
    #Y_1 = Y_0 + np.random.normal(CATE[Group],1)
    Y_1 = Y_0 + CATE[Group] / (Compliance * 0.5)

    Y = [Y_0[i] if A[i] == 0 else Y_1[i] for i in range(nSim)]

    Z = np.array(Z).ravel()
    A = np.array(A).ravel()
    Y = np.array(Y).ravel()

    #     print('Z==1:',sum(Z), 'A==Z:',sum(A*Z))

    # return full observed data
    return X, Y, A, nSim, Group, Y_0, Y_1, Z, A
Esempio n. 32
0
 def generate_tree(self,p, dim = 50):
     """Generate a tree where each tree is a leaf node with probability p"""
     
     cov_mat = make_spd_matrix(dim)
     root = Node(cov_mat = cov_mat, index = 0)
     #make the scale of inverse chi squared half the size of the lowest 
     #eigenvalue
     eVals = np.linalg.eigvals(cov_mat)
     self.scale = eVals[-1] / 100
     self.df = 4
     self.root = root
     self.n_nodes = 1
     self.generate_from_node(root,p)
Esempio n. 33
0
def laplacian_test():
    from sklearn.datasets import make_sparse_spd_matrix, make_spd_matrix
    from sklearn.preprocessing import MinMaxScaler
    from scipy.sparse import csr_matrix, linalg
    import time as time
    import matplotlib.pyplot as plt
    plt.style.use('ggplot')


    A = csr_matrix(make_spd_matrix(100))

    L, D, t = [], [], []
    for method in ['personal', 'sklearn']:
        t0 = time.time()
        temp_L, temp_D = create_laplacian(A)
        t1 = time.time()
        L.append(temp_L); D.append(temp_D)
        t.append(t1-t0)



    fig, ax = plt.subplots(nrows=1, ncols=2)



    ax[0].spy(L[0], precision=1E-10, markersize=.2)
    ax[0].set_title('My Method; {t:.2e} secs'.format(t=t[0]))
    ax[1].spy(L[1], precision=1E-10, markersize=.2)
    ax[1].set_title('Sklearn; {t:.2e} secs'.format(t=t[1]))
    plt.show()

    print(np.shape(L[0]), np.shape(L[1]))
    tol = 1E-1
    print('Different between the Laplacian Matrix' \
            'values close with tol: {tol}?'.format(tol=tol))

    assert (np.allclose(L[0].data, L[1].data, rtol=tol)), "False Laplacians not" \
    " the same."
    print('Test passed.')
Esempio n. 34
0
iris_df.fillna(iris_df.max())['sepal length (cm)'].head(5)
#0 5.1
#1 4.9
#2 4.7
#3 7.9
#4 5.0
#Name: sepal length (cm), dtype: float64



#Using Pipelines管道 for multiple preprocessing steps

from sklearn import datasets
import numpy as np
mat = datasets.make_spd_matrix(10)
masking_array = np.random.binomial(1, .1, mat.shape).astype(bool)
#数据丢失
mat[masking_array] = np.nan
mat[:4, :4]
#array([[ 0.56716186, -0.20344151, nan, -0.22579163],
#[ nan, 1.98881836, -2.25445983, 1.27024191],
#[ 0.29327486, -2.25445983, 3.15525425, -1.64685403],
#[-0.22579163, 1.27024191, -1.64685403, 1.32240835]])

# 没使用管道
from sklearn import preprocessing
impute = preprocessing.Imputer()
scaler = preprocessing.StandardScaler()
mat_imputed = impute.fit_transform(mat)
mat_imputed[:4, :4]