Example #1
0
def test_glmmexpfam_qs_none():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, None)

    assert_allclose(glmm.lml(), -38.30173374439622, atol=ATOL, rtol=RTOL)
    glmm.fix("beta")
    glmm.fix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -32.03927471370041, atol=ATOL, rtol=RTOL)

    glmm.unfix("beta")
    glmm.unfix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -19.575736561760586, atol=ATOL, rtol=RTOL)
Example #2
0
def cmc(g, xdists, u_to_x, T, seed, maxitr):
    """
    Crude Monte Carlo simulation.
    """

    # Seed the random number generator if required
    if seed == -1:
        prng = RandomState()
    else:
        prng = RandomState(seed)
    
    # Generate standard normal samples centered at the origin
    u0 = zeros(len(xdists))
    covmat = eye(len(xdists)) 
    u = prng.multivariate_normal(u0, covmat, size=maxitr).T
    g_mc = g(u_to_x(u, xdists, T))

    # Convert g-function output to pass/fail indicator function and estimate pf
    g_mc[g_mc>0] = 0
    g_mc[g_mc<0] = 1
    mu_pf = g_mc.mean()
    beta = -norm.ppf(mu_pf) if mu_pf < 0.5 else norm.ppf(mu_pf)

    # Convergence metrics (standard deviation, standard error, CoV of s.e.)
    std_pf = g_mc.std(ddof=1) # Calculate sample standard deviation
    se_pf = std_pf/sqrt(maxitr)
    cv_pf = se_pf/mu_pf

    return {'vars': xdists, 'beta': beta, 'Pf': mu_pf, 'stderr': se_pf, 
            'stdcv': cv_pf}
Example #3
0
def test_glmmexpfam_optimize():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm.lml(), -29.102168129099287, atol=ATOL, rtol=RTOL)
    glmm.fix("beta")
    glmm.fix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -27.635788105778012, atol=ATOL, rtol=RTOL)

    glmm.unfix("beta")
    glmm.unfix("scale")

    glmm.fit(verbose=False)

    assert_allclose(glmm.lml(), -19.68486269551159, atol=ATOL, rtol=RTOL)
Example #4
0
def test_glmmexpfam_predict():

    random = RandomState(4)
    n = 100
    p = n + 1

    X = ones((n, 2))
    X[:, 1] = random.randn(n)

    G = random.randn(n, p)
    G /= G.std(0)
    G -= G.mean(0)
    G /= sqrt(p)
    K = dot(G, G.T)

    i = asarray(arange(0, n), int)
    si = random.choice(i, n, replace=False)
    ntest = int(n // 5)
    itrain = si[:-ntest]
    itest = si[-ntest:]

    Xtrain = X[itrain, :]
    Ktrain = K[itrain, :][:, itrain]

    Xtest = X[itest, :]

    beta = random.randn(2)
    z = random.multivariate_normal(dot(X, beta), 0.9 * K + 0.1 * eye(n))

    ntri = random.randint(1, 100, n)
    nsuc = zeros(n, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)

    QStrain = economic_qs(Ktrain)
    nsuc_train = ascontiguousarray(nsuc[itrain])
    ntri_train = ascontiguousarray(ntri[itrain])

    nsuc_test = ascontiguousarray(nsuc[itest])
    ntri_test = ascontiguousarray(ntri[itest])

    glmm = GLMMExpFam(nsuc_train, ("binomial", ntri_train), Xtrain, QStrain)
    glmm.fit(verbose=False)
    ks = K[itest, :][:, itrain]
    kss = asarray([K[i, i] for i in itest])
    pm = glmm.predictive_mean(Xtest, ks, kss)
    pk = glmm.predictive_covariance(Xtest, ks, kss)
    r = nsuc_test / ntri_test
    assert_(corrcoef([pm, r])[0, 1] > 0.8)
    assert_allclose(pk[0], 54.263705682514846)
Example #5
0
def test_glmmexpfam_poisson():
    from numpy import ones, stack, exp, zeros
    from numpy.random import RandomState
    from numpy_sugar.linalg import economic_qs
    from pandas import DataFrame

    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)
    M = DataFrame(stack([offset, age], axis=1), columns=["offset", "age"])
    M["sample"] = [f"sample{i}" for i in range(n)]
    M = M.set_index("sample")

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
Example #6
0
 def mutate_transformed(
     self, sample_transformed: list, *,
     rng: RandomState,
     relscale: t.Union[float, t.Iterable[float]],
 ) -> list:
     if not isinstance(relscale, t.Iterable):
         relscale = [relscale] * self.n_dims
     cov = np.diag(relscale)
     retries = int(20 * np.sqrt(self.n_dims))
     for _ in range(retries):
         mut_sample = rng.multivariate_normal(sample_transformed, cov)
         if self.is_valid_transformed(mut_sample):
             return mut_sample
         cov *= 0.9  # make feasibility more likely
     raise RuntimeError(
         f"mutation failed to produce values within bounds"
         f"\n  last mut_sample = {mut_sample}"
         f"\n  input sample    = {sample_transformed}")
Example #7
0
def test_glmmexpfam_poisson():
    random = RandomState(1)

    # sample size
    n = 30

    # covariates
    offset = ones(n) * random.randn()
    age = random.randint(16, 75, n)
    M = stack((offset, age), axis=1)

    # genetic variants
    G = random.randn(n, 4)

    # sampling the phenotype
    alpha = random.randn(2)
    beta = random.randn(4)
    eps = random.randn(n)
    y = M @ alpha + G @ beta + eps

    # Whole genotype of each sample.
    X = random.randn(n, 50)
    # Estimate a kinship relationship between samples.
    X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1])
    K = X_ @ X_.T + eye(n) * 0.1
    # Update the phenotype
    y += random.multivariate_normal(zeros(n), K)
    y = (y - y.mean()) / y.std()

    z = y.copy()
    y = random.poisson(exp(z))

    M = M - M.mean(0)
    QS = economic_qs(K)
    glmm = GLMMExpFam(y, "poisson", M, QS)
    assert_allclose(glmm.lml(), -52.479557279193585)
    glmm.fit(verbose=False)
    assert_allclose(glmm.lml(), -34.09720756737648)
Example #8
0
def ismc(g, xdists, u_to_x, T, seed, maxitr, tol, ftol, eps):
    """
    Importance sampling Monte Carlo.
    """

    # Seed the random number generator if required
    if seed == -1:
        prng = RandomState()
    else:
        prng = RandomState(seed)
    
    # Use FORM to get estimate of u*
    u_beta = slsqp(g, xdists, u_to_x, T, maxitr, tol, ftol, eps)['u_beta']

    # Generate standard normal samples centred at u*
    covmat = eye(len(xdists))
    v = prng.multivariate_normal(u_beta, covmat, size=maxitr).T
    g_mc = g(u_to_x(v, xdists, T))
    
    # Define importance sampling functions weighting functions
    fv = multivariate_normal(zeros(len(xdists)), covmat).pdf
    hv = multivariate_normal(u_beta, covmat).pdf

    # Convert g-function output to pass/fail indicator function and estimate pf
    g_mc[g_mc>0] = 0
    g_mc[g_mc<0] = 1
    indfunc = g_mc * fv(v.T)/hv(v.T)
    mu_pf = indfunc.mean()
    beta = -norm.ppf(mu_pf) if mu_pf < 0.5 else norm.ppf(mu_pf)

    # Convergence metrics (standard deviation, standard error, CoV of s.e.)
    std_pf = indfunc.std(ddof=1) # Calculate sample standard deviation
    se_pf = std_pf/sqrt(maxitr)
    cv_pf = se_pf/mu_pf

    return {'vars': xdists, 'beta': beta, 'Pf': mu_pf, 'stderr': se_pf, 
            'stdcv': cv_pf}
Example #9
0
def test_glmmexpfam_copy():
    nsamples = 10

    random = RandomState(0)
    X = random.randn(nsamples, 5)
    K = linear_eye_cov().value()
    z = random.multivariate_normal(0.2 * ones(nsamples), K)
    QS = economic_qs(K)

    ntri = random.randint(1, 30, nsamples)
    nsuc = zeros(nsamples, dtype=int)
    for (i, ni) in enumerate(ntri):
        nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0)

    ntri = ascontiguousarray(ntri)
    glmm0 = GLMMExpFam(nsuc, ("binomial", ntri), X, QS)

    assert_allclose(glmm0.lml(), -29.10216812909928, atol=ATOL, rtol=RTOL)
    glmm0.fit(verbose=False)

    v = -19.575736562427252
    assert_allclose(glmm0.lml(), v)

    glmm1 = glmm0.copy()
    assert_allclose(glmm1.lml(), v)

    glmm1.scale = 0.92
    assert_allclose(glmm0.lml(), v, atol=ATOL, rtol=RTOL)
    assert_allclose(glmm1.lml(), -30.832831740038056, atol=ATOL, rtol=RTOL)

    glmm0.fit(verbose=False)
    glmm1.fit(verbose=False)

    v = -19.575736562378573
    assert_allclose(glmm0.lml(), v)
    assert_allclose(glmm1.lml(), v)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

from skTMVA import convert_bdt_sklearn_tmva

import cPickle

import numpy as np
from numpy.random import RandomState

RNG = RandomState(21)

# Construct an example dataset for binary classification
n_vars = 2
n_events = 10000
signal = RNG.multivariate_normal(
    np.ones(n_vars), np.diag(np.ones(n_vars)), n_events)
background = RNG.multivariate_normal(
    np.ones(n_vars) * -1, np.diag(np.ones(n_vars)), n_events)
X = np.concatenate([signal, background])
y = np.ones(X.shape[0])
w = RNG.randint(1, 10, n_events * 2)
y[signal.shape[0]:] *= -1
permute = RNG.permutation(y.shape[0])
X = X[permute]
y = y[permute]

# Use all dataset for training
X_train, y_train, w_train = X, y, w

# Declare BDT - we are going to use AdaBoost Decision Tree
dt = DecisionTreeClassifier(max_depth=3,
Example #11
0
    def _test_linear_mixed_model_low_rank(self):
        seed = 0
        n_populations = 8
        fst = n_populations * [.9]
        n_samples = 500
        n_variants = 200
        n_orig_markers = 100
        n_culprits = 10
        n_covariates = 3
        sigma_sq = 1
        tau_sq = 1

        from numpy.random import RandomState
        prng = RandomState(seed)

        x = np.hstack((np.ones(shape=(n_samples, 1)),
                       prng.normal(size=(n_samples, n_covariates - 1))))

        mt = hl.balding_nichols_model(n_populations=n_populations,
                                      n_samples=n_samples,
                                      n_variants=n_variants,
                                      fst=fst,
                                      af_dist=hl.rand_unif(0.1, 0.9, seed=seed),
                                      seed=seed)

        pa_t_path = utils.new_temp_file(suffix='bm')
        a_t_path = utils.new_temp_file(suffix='bm')

        BlockMatrix.write_from_entry_expr(mt.GT.n_alt_alleles(), a_t_path)

        a = BlockMatrix.read(a_t_path).T.to_numpy()
        g = a[:, -n_orig_markers:]
        g_std = self._filter_and_standardize_cols(g)

        n_markers = g_std.shape[1]

        k = (g_std @ g_std.T) * n_samples / n_markers

        beta = np.arange(n_covariates)
        beta_stars = np.array([1] * n_culprits)

        y = prng.multivariate_normal(
            np.hstack((a[:, 0:n_culprits], x)) @ np.hstack((beta_stars, beta)),
            sigma_sq * k + tau_sq * np.eye(n_samples))

        # low rank computation of S, P
        l = g_std.T @ g_std
        sl, v = np.linalg.eigh(l)
        n_eigenvectors = int(np.sum(sl > 1e-10))
        sl = sl[-n_eigenvectors:]
        v = v[:, -n_eigenvectors:]
        s = sl * (n_samples / n_markers)
        p = (g_std @ (v / np.sqrt(sl))).T

        # compare with full rank S, P
        sk0, uk = np.linalg.eigh(k)
        sk = sk0[-n_eigenvectors:]
        pk = uk[:, -n_eigenvectors:].T
        assert np.allclose(sk, s)
        assert np.allclose(np.abs(pk), np.abs(p))

        # build and fit model
        py = p @ y
        px = p @ x
        pa = p @ a

        model = LinearMixedModel(py, px, s, y, x)
        assert model.n == n_samples
        assert model.f == n_covariates
        assert model.r == n_eigenvectors
        assert model.low_rank

        model.fit()

        # check effect sizes tend to be near 1 for first n_marker alternative models
        BlockMatrix.from_numpy(pa).T.write(pa_t_path, force_row_major=True)
        df_lmm = model.fit_alternatives(pa_t_path, a_t_path).to_pandas()

        assert 0.9 < np.mean(df_lmm['beta'][:n_culprits]) < 1.1

        # compare NumPy and Hail LMM per alternative
        df_numpy = model.fit_alternatives_numpy(pa, a).to_pandas()
        assert np.min(df_numpy['chi_sq']) > 0

        na_numpy = df_numpy.isna().any(axis=1)
        na_lmm = df_lmm.isna().any(axis=1)

        assert na_numpy.sum() <= 10
        assert na_lmm.sum() <= 10
        assert np.logical_xor(na_numpy, na_lmm).sum() <= 5

        mask = ~(na_numpy | na_lmm)

        lmm_vs_numpy_p_value = np.sort(np.abs(df_lmm['p_value'][mask] - df_numpy['p_value'][mask]))

        assert lmm_vs_numpy_p_value[10] < 1e-12  # 10 least p-values differences
        assert lmm_vs_numpy_p_value[-1] < 1e-8   # all p-values
# <codecell>

# initialize the MR dataframe to store the simulated process
MR = pd.DataFrame(0.1, columns=["MR1", "MR2"], index=arange(T))
# intial value of the process
MR.ix[0] = [mu1, mu2]

# <codecell>

# set random seed
from numpy.random import RandomState

prng = RandomState(12345)
# simulate the process
for t in xrange(T - 1):
    dW = prng.multivariate_normal([0, 0], [[1, rho], [rho, 1]])
    next1 = (
        exp(-lam1) * MR["MR1"][t] + (1 - exp(-lam1)) * mu1 + sigma1 * sqrt((1 - exp(-2 * lam1)) / (2 * lam1)) * dW[0]
    )
    next2 = (
        exp(-lam2) * MR["MR2"][t] + (1 - exp(-lam2)) * mu2 + sigma2 * sqrt((1 - exp(-2 * lam2)) / (2 * lam2)) * dW[1]
    )
    MR.ix[t + 1] = [next1, next2]

# <codecell>

MR.index = pd.date_range("1/1/2000", periods=T)  # add an arbitrary date index

# <codecell>

MR.tail()
Example #13
0
    def test_linear_mixed_model_full_rank(self):
        seed = 0
        n_populations = 8
        fst = n_populations * [.9]
        n_samples = 200
        n_variants = 500
        n_orig_markers = 500
        n_culprits = 20
        n_covariates = 3
        sigma_sq = 1
        tau_sq = 1

        from numpy.random import RandomState
        prng = RandomState(seed)

        x = np.hstack((np.ones(shape=(n_samples, 1)),
                       prng.normal(size=(n_samples, n_covariates - 1))))

        mt = hl.balding_nichols_model(n_populations=n_populations,
                                      n_samples=n_samples,
                                      n_variants=n_variants,
                                      fst=fst,
                                      seed=seed)

        pa_t_path = utils.new_temp_file(suffix='bm')

        a = BlockMatrix.from_entry_expr(mt.GT.n_alt_alleles()).T.to_numpy()
        g = a[:, -n_orig_markers:]
        g_std = self._filter_and_standardize_cols(g)

        n_markers = g_std.shape[1]

        k = (g_std @ g_std.T) * n_samples / n_markers

        beta = np.arange(n_covariates)
        beta_stars = np.array([1] * n_culprits)

        y = prng.multivariate_normal(
            np.hstack((a[:, 0:n_culprits], x)) @ np.hstack((beta_stars, beta)),
            sigma_sq * k + tau_sq * np.eye(n_samples))

        s, u = np.linalg.eigh(k)
        p = u.T

        # build and fit model
        py = p @ y
        px = p @ x
        pa = p @ a

        model = LinearMixedModel(py, px, s)
        assert model.n == n_samples
        assert model.f == n_covariates
        assert model.r == n_samples
        assert (not model.low_rank)

        model.fit()

        # check effect sizes tend to be near 1 for first n_marker alternative models
        BlockMatrix.from_numpy(pa).T.write(pa_t_path, force_row_major=True)
        df_lmm = model.fit_alternatives(pa_t_path).to_pandas()

        assert 0.9 < np.mean(df_lmm['beta'][:n_culprits]) < 1.1

        # compare NumPy and Hail LMM per alternative
        df_numpy = model.fit_alternatives_numpy(pa, a).to_pandas()

        na_numpy = df_numpy.isna().any(axis=1)
        na_lmm = df_lmm.isna().any(axis=1)

        assert na_numpy.sum() <= 20
        assert na_lmm.sum() <= 20
        assert np.logical_xor(na_numpy, na_lmm).sum() <= 10

        mask = ~(na_numpy | na_lmm)

        lmm_vs_numpy_p_value = np.sort(
            np.abs(df_lmm['p_value'][mask] - df_numpy['p_value'][mask]))

        assert lmm_vs_numpy_p_value[10] < 1e-12  # 10 least p-values differences
        assert lmm_vs_numpy_p_value[-1] < 1e-8  # all p-values
Example #14
0
=============================================
"""
from array import array
import numpy as np
from numpy.random import RandomState
from root_numpy.tmva import add_classification_events, evaluate_reader
from root_numpy import ROOT_VERSION
import matplotlib.pyplot as plt
from ROOT import TMVA, TFile, TCut

plt.style.use('ggplot')
RNG = RandomState(42)

# Construct an example multiclass dataset
n_events = 1000
class_0 = RNG.multivariate_normal(
    [-2, -2], np.diag([1, 1]), n_events)
class_1 = RNG.multivariate_normal(
    [0, 2], np.diag([1, 1]), n_events)
class_2 = RNG.multivariate_normal(
    [2, -2], np.diag([1, 1]), n_events)
X = np.concatenate([class_0, class_1, class_2])
y = np.ones(X.shape[0])
w = RNG.randint(1, 10, n_events * 3)
y[:class_0.shape[0]] *= 0
y[-class_2.shape[0]:] *= 2
permute = RNG.permutation(y.shape[0])
X = X[permute]
y = y[permute]

# Split into training and test datasets
X_train, y_train, w_train = X[:n_events], y[:n_events], w[:n_events]
Example #15
0
class GPPairwise:
    """
    Gaussian process with a probit likelihood for pairwise comparisons.
    """
    def __init__(self,
                 num_objectives,
                 std_noise=0.01,
                 kernel_width=0.15,
                 prior_mean_type='zero',
                 seed=None):
        """
        :param num_objectives:      number of objectives of input for utility function we want to approximate
        :param std_noise:           the standard deviation of the normal distributed noise we assume for the utility function
        :param prior_mean_type:     prior mean function type (zero/linear), default is zero
        :param kernel_width:        parameter for kernel width, deafult is 0.2
        :param seed:                seed for random state
        """
        self.num_objectives = num_objectives
        self.std_noise = std_noise
        self.kernel_width = kernel_width
        self.prior_mean_type = prior_mean_type
        self.random_state = RandomState(seed)

        # variables for the observed data
        self.datapoints = None
        self.comparisons = None

        # approximate utility values of the datapoints
        self.utility_vals = None

        # covariance matrix of datapoints
        self.cov_mat = None
        self.cov_mat_inv = None

        # hessian (second deriv) of the pairwise likelihoods for observed data
        self.hess_likelihood = None
        self.hess_likelihood_inv = None

        # needed for predictive distribution (cov - hess_likelihood_inv)^(-1)
        self.pred_cov_factor = None

    def sample(self, sample_points):
        """
        Get a sample from the current GP at the given points.
        :param sample_points:   the points at which we want to take the sample
        :return:                the values of the GP sample at the input points
        """
        # bring sample points in right shape
        sample_points = utils_data.format_data(sample_points,
                                               self.num_objectives)

        # get the mean and the variance of the predictive (multivariate gaussian) distribution at the sample points
        mean, var = self.get_predictive_params(sample_points, pointwise=False)

        # sample from the multivariate gaussian with the given parameters
        f_sample = self.random_state.multivariate_normal(mean, var, 1)[0]

        return f_sample

    def get_predictive_params(self, x_new, pointwise):
        """
        Returns the predictive parameters (mean, variance) of the Gaussian distribution
        at the given datapoints
        :param x_new:    the points for which we want the predictive params
        :param pointwise:       whether we want pointwise variance or the entire covariance matrix
        :return:
        """
        # bring input points into right shape
        x_new = utils_data.format_data(x_new, self.num_objectives)

        # if we don't have any data yet, use prior GP to make predictions
        if self.datapoints is None or self.utility_vals is None:
            pred_mean, pred_var = self._evaluate_prior(x_new)

        # otherwise compute predictive mean and covariance
        else:
            cov_xnew_x = self._cov_mat(x_new, self.datapoints, noise=False)
            cov_x_xnew = self._cov_mat(self.datapoints, x_new, noise=False)
            cov_xnew = self._cov_mat(x_new, noise=False)
            pred_mean = self.prior_mean(x_new) + np.dot(
                np.dot(cov_xnew_x, self.cov_mat_inv),
                (self.utility_vals - self.prior_mean(self.datapoints)))
            pred_var = cov_xnew - np.dot(
                np.dot(cov_xnew_x, self.pred_cov_factor), cov_x_xnew)

        if pointwise:
            pred_var = pred_var.diagonal()

        return pred_mean, pred_var

    def update(self, dataset):
        """
        Update the Gaussian process using the given data
        :param dataset:
        :return:        """
        self.datapoints = dataset.datapoints
        self.comparisons = dataset.comparisons

        # compute the covariance matrix given the new datapoints
        self.cov_mat = self._cov_mat(self.datapoints)
        self.cov_mat_inv = np.linalg.inv(self.cov_mat)

        # compute the map estimate of f
        self.utility_vals = self._compute_posterior()

        # compute the hessian of the likelihood given f_MAP
        self.hess_likelihood = self._compute_hess_likelihood()
        try:
            self.hess_likelihood_inv = np.linalg.inv(self.hess_likelihood)
        except:
            self.hess_likelihood_inv = np.linalg.pinv(self.hess_likelihood)

        self.pred_cov_factor = np.linalg.inv(self.cov_mat -
                                             self.hess_likelihood_inv)

    def _evaluate_prior(self, input_points):
        """
        Given some datapoints, evaluate the prior
        :param input_points:    input datapoints at which to evaluate prior
        :return:                predictive mean and covariance at the given inputs
        """
        pred_mean = self.prior_mean(input_points)
        num_inputs = input_points.shape[0]
        pred_cov = self._kernel(np.repeat(input_points, num_inputs, axis=0),
                                np.tile(input_points,
                                        (num_inputs, 1))).reshape(
                                            (num_inputs, num_inputs))
        return pred_mean, pred_cov

    def _cov_mat(self, x1, x2=None, noise=True):
        """
        Covariance matrix for preference data using the kernel function.
        :param x1:      datapoints for which to compute covariance matrix
        :param x2:      if None, covariance matrix will be square for the input x1
                        if not None, covariance will be between x1 (rows) and x2 (cols_
        :param noise:   whether to add noise to the diagonal of the covariance matrix
        :return:
        """
        if x2 is None:
            x2 = x1
        else:  # if x1 != x2 we don't add noise!
            noise = False

        x1 = utils_data.format_data(x1, self.num_objectives)
        x2 = utils_data.format_data(x2, self.num_objectives)

        cov_mat = self._kernel(np.repeat(x1, x2.shape[0], axis=0),
                               np.tile(x2, (x1.shape[0], 1)))
        cov_mat = cov_mat.reshape((x1.shape[0], x2.shape[0]))

        if noise:
            cov_mat += self.std_noise**2 * np.eye(cov_mat.shape[0])

        return cov_mat

    def prior_mean(self, x):
        """
        Prior mean function
        :param x:   num_datapoints * num_objectives
        :return:
        """
        x = utils_data.format_data(x, self.num_objectives)
        m = np.zeros(x.shape[0])
        if self.prior_mean_type == 'linear':
            m += np.sum(x, axis=1) / self.num_objectives
        else:
            TypeError('Prior mean type not understood.')
        return m

    def _kernel(self, x1, x2):
        x1 = utils_data.format_data(x1, self.num_objectives)
        x2 = utils_data.format_data(x2, self.num_objectives)
        k = 0.8**2 * np.exp(-(1. / (2. * (self.kernel_width**2))) *
                            np.linalg.norm(x1 - x2, axis=1)**2)
        return k

    def _compute_hess_likelihood(self, z=None):
        """
        Compute the hessian of the likelihood given utility values f
        :return:
        """
        if z is None:
            # compute z
            f_winner = np.array([
                self.utility_vals[self.comparisons[i, 0]]
                for i in range(self.comparisons.shape[0])
            ])
            f_loser = np.array([
                self.utility_vals[self.comparisons[i, 1]]
                for i in range(self.comparisons.shape[0])
            ])
            z = (f_winner - f_loser) / (np.sqrt(2) * self.std_noise)

        z_logpdf = norm.logpdf(z)
        z_logcdf = norm.logcdf(z)

        # initialise with zeros
        lambda_mat = np.zeros(
            (self.datapoints.shape[0], self.datapoints.shape[0]))

        # build up diagonal for pairs (xi, xi)
        diag_arr = np.array([
            self._compute_hess_likelihood_entry(m, m, z, z_logpdf, z_logcdf)
            for m in range(self.datapoints.shape[0])
        ])
        np.fill_diagonal(lambda_mat, diag_arr)  # happens in-place

        # go through the list of comparisons collected so far and update lambda
        for k in range(self.comparisons.shape[0]):
            m = self.comparisons[k, 0]  # winner
            n = self.comparisons[k, 1]  # loser
            lambda_mat[m, n] = self._compute_hess_likelihood_entry(
                m, n, z, z_logpdf, z_logcdf)
            lambda_mat[n, m] = self._compute_hess_likelihood_entry(
                n, m, z, z_logpdf, z_logcdf)

        # add jitter term to make lambda positive definite for computational stability
        lambda_mat += np.eye(self.datapoints.shape[0]) * 0.01

        return lambda_mat

    def _compute_hess_likelihood_entry(self, m, n, z, z_logpdf, z_logcdf):
        """
        Get a single entry for the Hessian matrix at indices (m,n)
        :param m:
        :param n:
        :param f:
        :return:
        """
        h_x_m = np.array(self.comparisons[:, 0] == m, dtype=int) - np.array(
            self.comparisons[:, 1] == m, dtype=int)
        h_x_n = np.array(self.comparisons[:, 0] == n, dtype=int) - np.array(
            self.comparisons[:, 1] == n, dtype=int)
        p = h_x_m * h_x_n * (np.exp(2. * z_logpdf - 2. * z_logcdf) +
                             z * np.exp(z_logpdf - z_logcdf))
        c = -np.sum(p) / (2 * self.std_noise**2)
        return c

    def _compute_posterior(self):
        """ Approximate the posterior distribution """

        converged = False
        try_no = 0

        f_map = None

        # using Newton-Raphson, approximate f_MAP
        while not converged and try_no < 1:

            # randomly initialise f_map
            f_map = self.random_state.uniform(0., 1., self.datapoints.shape[0])
            # f_map = np.zeros(gp.datapoints.shape[0])

            for m in range(100):

                # compute z
                f_winner = np.array([
                    f_map[self.comparisons[i, 0]]
                    for i in range(self.comparisons.shape[0])
                ])
                f_loser = np.array([
                    f_map[self.comparisons[i, 1]]
                    for i in range(self.comparisons.shape[0])
                ])
                z = (f_winner - f_loser) / (np.sqrt(2) * self.std_noise)
                z_logpdf = norm.logpdf(z)
                z_logcdf = norm.logcdf(z)

                # compute b
                h_j = np.array([
                    np.array(self.comparisons[:, 0] == j, dtype=int) -
                    np.array(self.comparisons[:, 1] == j, dtype=int)
                    for j in range(self.datapoints.shape[0])
                ])
                b = np.sum(h_j * np.exp(z_logpdf - z_logcdf),
                           axis=1) / (np.sqrt(2) * self.std_noise)

                # compute gradient g
                g = -np.dot(self.cov_mat_inv,
                            (f_map - self.prior_mean(self.datapoints))) + b

                # compute approximation of the hessian of the posterior
                hess_likelihood = self._compute_hess_likelihood(z)
                hess_posterior = -self.cov_mat_inv + hess_likelihood
                try:
                    hess_posterior_inv = np.linalg.inv(hess_posterior)
                except:
                    hess_posterior_inv = np.linalg.pinv(hess_posterior)

                # perform update
                update = np.dot(hess_posterior_inv, g)
                f_map -= update

                # stop criterion
                if np.linalg.norm(update) < 0.0001:
                    converged = True
                    break

            if not converged:
                print("Did not converge.")
                try_no += 1

        return f_map
Example #16
0
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_curve

from sklearn import tree
import cPickle

import numpy as np
from numpy.random import RandomState

RNG = RandomState(45)

# Construct an example dataset for binary classification
n_vars = 2
n_events = 300
signal = RNG.multivariate_normal(
    np.ones(n_vars), np.diag(np.ones(n_vars)), n_events)
background = RNG.multivariate_normal(
    np.ones(n_vars) * -1, np.diag(np.ones(n_vars)), n_events)
X = np.concatenate([signal, background])
y = np.ones(X.shape[0])
w = RNG.randint(1, 10, n_events * 2)
y[signal.shape[0]:] *= -1
permute = RNG.permutation(y.shape[0])
X = X[permute]
y = y[permute]

# Some print-out
print "Event numbers total:", 2 * n_events

# Plot the testing points
c1 = ROOT.TCanvas("c1","Testing Dataset",200,10,700,500)
Example #17
0
bpy.ops.wm.open_mainfile(filepath=“/Users/bacheletlab/Downloads/stereo_equirectangular_base.blend")


import bpy
from numpy.random import RandomState
import numpy as np


count = 200
rs = RandomState(1234)

min_pt = 0
max_pt = 10
cube_size = 0.1

# Generate some data from a multivariate normal distribution
# (http://stackoverflow.com/questions/16024677/generate-correlated-data-in-python-3-3)
mu = np.array([0.0, 5.0, 5.0])
r = np.array([[5.5, 0.3, 4.3],
              [0.4,  1., 0.5],
              [4.8, 0.5, 5.0]])
data = rs.multivariate_normal(mu, r, size=count)
print(data)

for x, y, z in data:
    print("xyz", x, y, z)
    bpy.ops.mesh.primitive_cube_add(location=(x, y, z))
    bpy.ops.transform.resize(value=(cube_size, cube_size, cube_size))

bpy.data.scenes['Scene'].render.filepath = '/Users/bacheletlab/Downloads/output.png'
bpy.ops.render.render(write_still=True)
class GP_pairwise:
    """Gaussian process with a discrete-choice probit model (latent utility function).
    
    Attributes:
        sigma: Hyperparameter for std of the normal distributed noise for the utility function.
        theta: Hyperparameter for kernel width.
        seed: Seed for random state.
        datapoints: Matrix for the observed data.
        comparisons: Matrix for comparisons of the observed data.
        f_map: Approximate utility values of the datapoints.
        K: Covariance matrix of datapoints.
        K_inv: Inverse of the covariance matrix of datapoints.
        C: The matrix C for observed data.
        C_inv: The inverse of matrix C for observed data.
    """
    def __init__(self, sigma=0.01, theta=50, seed=None):
        """Inits GP class with all attributes."""
        self.sigma = sigma
        self.theta = theta
        self.random_state = RandomState(seed)
        self.datapoints = None
        self.comparisons = None
        self.f_map = None
        self.K = None
        self.K_inv = None
        self.C = None
        self.C_inv = None

    def update(self, dataset):
        """Update the Gaussian process using the given data.
        
        Args:
            dataset: Dataset consists of datapoints and comparison matrix data 
        """
        self.datapoints = dataset.datapoints
        self.comparisons = dataset.comparisons

        # compute the covariance matrix and its inverse
        self.K = self._get_K(self.datapoints)
        self.K_inv = self._get_inv(self.K)

        # compute the MAP estimate of f
        self.f_map = self._get_f_map()

        # compute C matrix given f_MAP and its inverse (psudo-inverse)
        self.C = self._get_C()
        self.C_inv = self._get_inv(self.C)

        return True

    def sample(self, sample_points):
        """Get a sample from the current GP at the given points.
        
        Args:
            sample_points: The points at which we want to take the sample.
            
        Returns: 
            The values of the GP sample at the input points.
        """
        # get the mean and the variance of the predictive (multivariate gaussian) distribution at the sample points
        mean, var = self.get_Gaussian_params(sample_points, pointwise=False)

        # sample from the multivariate gaussian with the given parameters
        f_sample = self.random_state.multivariate_normal(mean, var, 1)[0]

        return f_sample

    def predict(self, sample_point):
        """Predicts value function for a single datapoint"""
        mean, var = self.get_Gaussian_params(np.array([sample_point]),
                                             pointwise=False)
        f_sample = self.random_state.multivariate_normal(mean, var, 1)[0]
        return mean

    def get_Gaussian_params(self, x_new, pointwise):
        """Gets the Gaussian parameters.
        
        Args:
            x_new: the points for which we want the predictive parameters.
            pointwise: whether we want pointwise variance or the entire covariance matrix.
            
        Returns:
            The predictive parameters of the Gaussian distribution at the given datapoints.
        """
        # if we don't have any data yet, use prior GP to make predictions
        if self.datapoints is None or self.f_map is None:
            pred_mean, pred_var = self._evaluate_prior(x_new)

        # otherwise compute predictive mean and covariance
        else:
            k_T = self._get_K(x_new, self.datapoints, noise=False)
            k = self._get_K(self.datapoints, x_new, noise=False)
            k_plus = self._get_K(x_new, noise=False)
            pred_mean = self._prior_mean(k_plus) + np.dot(
                np.dot(k_T, self.K_inv),
                (self.f_map - self._prior_mean(self.datapoints)))
            pred_var = k_plus - np.dot(
                np.dot(k_T, self._get_inv(self.K + self.C_inv)), k)
        if pointwise:
            pred_var = pred_var.diagonal()

        return pred_mean, pred_var

    def _get_K(self, x1, x2=None, noise=True):
        """Computes covariance matrix for preference data using the kernel function.
        
        Args:
            x1: The datapoints for which to compute covariance matrix.
            x2: If None, covariance matrix will be square for the input x1,
                If not None, covariance will be between x1 (rows) and x2 (cols)
            noise: Whether to add noise to the diagonal of the covariance matrix.
            
        Returns:
            The covariance matrix K.            
        """
        if x2 is None:
            x2 = x1
        else:
            noise = False
        K = self._k(np.repeat(x1, x2.shape[0], axis=0),
                    np.tile(x2, (x1.shape[0], 1)))
        K = K.reshape((x1.shape[0], x2.shape[0]))
        if noise:
            K += self.sigma**2 * np.eye(K.shape[0])
        return K

    def _k(self, x1, x2):
        """Exponentiated quadratic kernel function"""
        k = 0.8**2 * np.exp(-(1. / (2. * (self.theta**2))) *
                            np.linalg.norm(x1 - x2, axis=1)**2)
        return k

    def _get_f_map(self):
        """Computes maximum a posterior (MAP) evaluation of f given the data using Newton's method
        
        Returns: 
            MAP of the Gassian processes values at current datapoints
        """
        converged = False
        try_no = 0

        f_map = None

        # Newton's method to approximate f_MAP
        while not converged and try_no < 1:

            # randomly initialise f_map
            f_map = self.random_state.uniform(0., 1., self.datapoints.shape[0])

            for m in range(100):
                # compute Z
                f_sup = np.array([
                    f_map[self.comparisons[i, 0]]
                    for i in range(self.comparisons.shape[0])
                ])
                f_inf = np.array([
                    f_map[self.comparisons[i, 1]]
                    for i in range(self.comparisons.shape[0])
                ])
                Z = self._get_Z(f_sup, f_inf)
                Z_logpdf = norm.logpdf(Z)
                Z_logcdf = norm.logcdf(Z)

                # compute b
                b = self._get_b(Z_logpdf, Z_logcdf)

                # compute gradient g
                g = self._get_g(f_map, b)

                # compute hessian H
                C = self._get_C(Z)
                H = -self.K_inv + C
                H_inv = self._get_inv(H)

                # perform update
                update = np.dot(H_inv, g)
                f_map -= update

                # stop criterion
                if np.linalg.norm(update) < 0.0001:
                    converged = True
                    break

            if not converged:
                print("Did not converge.")
                try_no += 1

        return f_map

    def _get_Z(self, f_sup, f_inf):
        """Gets the random variable Z based on given sup and inf pair."""
        return (f_sup - f_inf) / (np.sqrt(2) * self.sigma)

    def _get_b(self, Z_logpdf, Z_logcdf):
        """Gets the N-dimensional vector b"""
        h_j = np.array([
            np.array(self.comparisons[:, 0] == j, dtype=int) -
            np.array(self.comparisons[:, 1] == j, dtype=int)
            for j in range(self.datapoints.shape[0])
        ])

        b = np.sum(h_j * np.exp(Z_logpdf - Z_logcdf),
                   axis=1) / (np.sqrt(2) * self.sigma)
        return b

    def _get_g(self, f_map, b):
        """Gets the gradient g"""
        return -np.dot(self.K_inv,
                       (f_map - self._prior_mean(self.datapoints))) + b

    def _get_C(self, Z=None):
        """Gets the matrix C"""
        if Z is None:
            # compute z
            f_sup = np.array([
                self.f_map[self.comparisons[i, 0]]
                for i in range(self.comparisons.shape[0])
            ])
            f_inf = np.array([
                self.f_map[self.comparisons[i, 1]]
                for i in range(self.comparisons.shape[0])
            ])
            Z = (f_sup - f_inf) / (np.sqrt(2) * self.sigma)

        Z_logpdf = norm.logpdf(Z)
        Z_logcdf = norm.logcdf(Z)

        # init with zeros
        C = np.zeros((self.datapoints.shape[0], self.datapoints.shape[0]))

        # build up diagonal for pairs (x, x)
        diag_arr = np.array([
            self._get_C_entry(m, m, Z, Z_logpdf, Z_logcdf)
            for m in range(self.datapoints.shape[0])
        ])
        np.fill_diagonal(C, diag_arr)  # happens in-place

        # go through the existing list of comparisons and update C
        for k in range(self.comparisons.shape[0]):
            m = self.comparisons[k, 0]  # superior
            n = self.comparisons[k, 1]  # inferior
            C[m, n] = self._get_C_entry(m, n, Z, Z_logpdf, Z_logcdf)
            C[n, m] = self._get_C_entry(n, m, Z, Z_logpdf, Z_logcdf)

        # add jitter terms to make matrix C positive semidefinite for stable computation
        C += np.eye(self.datapoints.shape[0]) * 0.01

        return C

    def _get_C_entry(self, m, n, Z, Z_logpdf, Z_logcdf):
        """Gets a single entry for the Hessian matrix at indices (m,n)"""
        h_x_m = np.array(self.comparisons[:, 0] == m, dtype=int) - np.array(
            self.comparisons[:, 1] == m, dtype=int)
        h_x_n = np.array(self.comparisons[:, 0] == n, dtype=int) - np.array(
            self.comparisons[:, 1] == n, dtype=int)
        p = h_x_m * h_x_n * (np.exp(2. * Z_logpdf - 2. * Z_logcdf) +
                             Z * np.exp(Z_logpdf - Z_logcdf))
        c = -np.sum(p) / (2 * self.sigma**2)
        return c

    def _evaluate_prior(self, input_points):
        """Evaluates the prior distribution given some datapoints.
        
        Args:
            input_points: input datapoints at which to evaluate prior distribution.
            
        Returns:
            The predictive mean and covariance at the given inputs.
        """
        pred_mean = self._prior_mean(input_points)
        num_inputs = input_points.shape[0]
        pred_cov = self._k(np.repeat(input_points, num_inputs, axis=0),
                           np.tile(input_points, (num_inputs, 1))).reshape(
                               (num_inputs, num_inputs))
        return pred_mean, pred_cov

    def _get_inv(self, M):
        """Computes the inverse of the given matrix or the psudoinverse."""
        try:
            M_inv = np.linalg.inv(M)
        except:
            M_inv = np.linalg.pinv(M)
        return M_inv

    def _prior_mean(self, x):
        """Returns the prior mean of zeros"""
        m = np.zeros(x.shape[0])
        return m