Beispiel #1
0
def test_sum_kernel_grad():
    npr.seed(1)

    eps = 1e-5
    N = 10
    M = 5
    D = 3

    kernel1 = Matern52(D)
    kernel2 = Matern52(D)
    kernel3 = Matern52(D)
    kernel = SumKernel(kernel1, kernel2, kernel3)

    data1 = npr.randn(N, D)
    data2 = npr.randn(M, D)

    loss = np.sum(kernel.cross_cov(data1, data2))
    dloss = kernel.cross_cov_grad_data(data1, data2).sum(0)

    dloss_est = np.zeros(dloss.shape)
    for i in xrange(M):
        for j in xrange(D):
            data2[i, j] += eps
            loss_1 = np.sum(kernel.cross_cov(data1, data2))
            data2[i, j] -= 2 * eps
            loss_2 = np.sum(kernel.cross_cov(data1, data2))
            data2[i, j] += eps
            dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_backward_pass():
    npr.seed(1)

    eps = 1e-5
    N   = 15
    D   = 10

    data = 0.5*npr.rand(N,D)

    norm      = Normalization(3)
    norm_inds = [1,3,5]

    bw      = BetaWarp(2)
    bw_inds = [0,2]

    lin      = Linear(3)
    lin_inds = [6,8,9]

    t = Transformer(D)

    # Add a layer and test the gradient
    t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds))
    new_data = t.forward_pass(data)
    loss     = np.sum(new_data**2)
    V        = 2*new_data

    dloss = t.backward_pass(V)
    
    dloss_est = np.zeros(dloss.shape)
    for i in xrange(N):
        for j in xrange(D):
            data[i,j] += eps
            loss_1 = np.sum(t.forward_pass(data)**2)
            data[i,j] -= 2*eps
            loss_2 = np.sum(t.forward_pass(data)**2)
            data[i,j] += eps
            dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-6

    # Add a second layer and test the gradient
    t.add_layer(Linear(9))

    new_data = t.forward_pass(data)
    loss     = np.sum(new_data**2)
    V        = 2*new_data

    dloss = t.backward_pass(V)
    
    dloss_est = np.zeros(dloss.shape)
    for i in xrange(N):
        for j in xrange(D):
            data[i,j] += eps
            loss_1 = np.sum(t.forward_pass(data)**2)
            data[i,j] -= 2*eps
            loss_2 = np.sum(t.forward_pass(data)**2)
            data[i,j] += eps
            dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-6
Beispiel #3
0
    def kernel(self, x1, x2=None, grad=False):
        if x2 is None:
            x2 = x1
        cov = np.ones((x1.shape[0], x2.shape[0]))

        if grad:
            Ks = list()
            dKs = list()
            cov_grad = np.zeros((x1.shape[0], 1, x2.shape[1]))
            for i in xrange(len(self.kernels)):
                (K, dK) = self.kernels[i].kernel(x1[:, self.dim_indices[i]],
                                                 x2[:,
                                                    self.dim_indices[i]], grad)
                Ks.append(K)
                dKs.append(dK)
                cov = cov * K

            for i in xrange(len(self.kernels)):
                cov_grad[:, :, self.dim_indices[i]] = (
                    cov_grad[:, :, self.dim_indices[i]] + dKs[i] *
                    (cov / Ks[i])[:, :, np.newaxis])
            return (cov, cov_grad)
        else:
            for i in xrange(len(self.kernels)):
                cov = cov * self.kernels[i].kernel(
                    x1[:, self.dim_indices[i]], x2[:,
                                                   self.dim_indices[i]], grad)
        return cov
Beispiel #4
0
def test_backward_pass():
    npr.seed(1)

    eps = 1e-5
    N = 10
    D = 5

    nl = NormLin(D)

    data = 0.5 * npr.rand(N, D)
    new_data = nl.forward_pass(data)
    loss = np.sum(new_data**2)
    V = 2 * new_data

    dloss = nl.backward_pass(V)

    dloss_est = np.zeros(dloss.shape)
    for i in xrange(N):
        for j in xrange(D):
            data[i, j] += eps
            loss_1 = np.sum(nl.forward_pass(data)**2)
            data[i, j] -= 2 * eps
            loss_2 = np.sum(nl.forward_pass(data)**2)
            data[i, j] += eps
            dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-6
Beispiel #5
0
def test_grad():
    npr.seed(1)

    eps = 1e-5
    N = 10
    M = 5
    D = 5
    inds = [0, 2, 4]

    kernel = Subset(D, Matern52(len(inds)), inds)

    data1 = npr.randn(N, D)
    data2 = npr.randn(M, D)

    loss = np.sum(kernel.cross_cov(data1, data2))
    dloss = kernel.cross_cov_grad_data(data1, data2).sum(0)

    dloss_est = np.zeros(dloss.shape)
    for i in xrange(M):
        for j in xrange(D):
            data2[i, j] += eps
            loss_1 = np.sum(kernel.cross_cov(data1, data2))
            data2[i, j] -= 2 * eps
            loss_2 = np.sum(kernel.cross_cov(data1, data2))
            data2[i, j] += eps
            dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps))

    print('Subset kernel grad using indices %s:' % inds)
    print(dloss)

    assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_predict():
    npr.seed(1)

    N     = 10
    Npend = 3
    Ntest = 2
    D     = 5

    gp   = GPClassifier(D, burnin=5, num_fantasies=7)
    pred = npr.rand(Ntest,D)

    # Test with 0 points
    mu, v = gp.predict(pred)
    np.testing.assert_allclose(mu, 0, rtol=1e-7, atol=0, err_msg='', verbose=True)
    np.testing.assert_allclose(v, 1+1e-6, rtol=1e-7, atol=0, err_msg='', verbose=True)

    #Test with 1 point
    X   = np.zeros((1,D))
    W   = npr.randn(D,1)
    val = X.dot(W).flatten() > 0

    gp.fit(X, val, fit_hypers=False)

    mu, v = gp.predict(pred)
    
    # Points closer to the origin will have less variance and a larger mean   
    mu, v = gp.predict(np.tile(np.linspace(0,1,100)[:,None],(1,D)))
    assert np.all(np.diff(mu) > 0) and np.all(np.diff(v) > 0)

    # Now let's make sure it doesn't break with more data and pending
    inputs  = 0.5*npr.rand(N,D)
    vals    = inputs.dot(W).flatten() > 0
    pending = npr.rand(Npend,D)

    gp.fit(inputs, vals, pending)

    mu, v = gp.predict(pred)

    # Now let's check the gradients
    eps = 1e-5

    mu, v, dmu, dv = gp.predict(pred, compute_grad=True)

    # The implied loss is np.sum(mu**2) + np.sum(v**2)
    dloss = 2*(dmu*mu[:,np.newaxis,:]).sum(2) + 2*(v[:,np.newaxis,np.newaxis]*dv).sum(2)

    dloss_est = np.zeros(dloss.shape)
    for i in xrange(Ntest):
        for j in xrange(D):
            pred[i,j] += eps
            mu, v = gp.predict(pred)
            loss_1 = np.sum(mu**2) + np.sum(v**2)
            pred[i,j] -= 2*eps
            mu, v = gp.predict(pred)
            loss_2 = np.sum(mu**2) + np.sum(v**2)
            pred[i,j] += eps
            dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-5
Beispiel #7
0
def grad_dist2(ls, x1, x2=None):
    if x2 is None:
        x2 = x1
        
    # Rescale.
    x1 = x1 / ls
    x2 = x2 / ls
    
    N = x1.shape[0]
    M = x2.shape[0]
    D = x1.shape[1]
    gX = np.zeros((x1.shape[0],x2.shape[0],x1.shape[1]))

    code = \
    """
    for (int i=0; i<N; i++)
      for (int j=0; j<M; j++)
        for (int d=0; d<D; d++)
          gX(i,j,d) = (2/ls(d))*(x1(i,d) - x2(j,d));
    """
    try:
        scipy.weave.inline(code, ['x1','x2','gX','ls','M','N','D'], \
                           type_converters=scipy.weave.converters.blitz, \
                           compiler='gcc')
    except:
    # The C code weave above is 10x faster than this:
        for i in xrange(0,x1.shape[0]):
            gX[i,:,:] = 2*(x1[i,:] - x2[:,:])*(1/ls)

    return gX
Beispiel #8
0
    def paramify_and_print(self,
                           data_vector,
                           left_indent=0,
                           indent_top_row=False):
        params = self.paramify(data_vector)
        indentation = ' ' * left_indent

        if indent_top_row:
            sys.stderr.write(indentation)
        sys.stderr.write('NAME          TYPE       VALUE\n')
        sys.stderr.write(indentation)
        sys.stderr.write('----          ----       -----\n')

        for param_name, param in items(params):

            if param['type'] == 'float':
                format_str = '%s%-12.12s  %-9.9s  %-12f\n'
            elif param['type'] == 'enum':
                format_str = '%s%-12.12s  %-9.9s  %-12s\n'
            else:
                format_str = '%s%-12.12s  %-9.9s  %-12d\n'

            for i in xrange(len(param['values'])):
                if i == 0:
                    sys.stderr.write(format_str %
                                     (indentation, param_name, param['type'],
                                      param['values'][i]))
                else:
                    sys.stderr.write(format_str %
                                     (indentation, '', param['values'][i]))
Beispiel #9
0
    def variables_config_to_meta(self, variables_config):
        """
        Converts a dict of variable meta-information from a config-file format into
        a format that can be more easily used by bayesopt routines.
        """
        # Stores the metadata for the dataset that allows a conversion
        # from a config file representation into a matrix representation.
        # The main addition that this variable adds is a mapping between
        # each variable and associated column indices in the matrix
        # representation.
        variables_meta = OrderedDict()
        cardinality = 0  # The number of distinct variables
        num_dims = 0  # The number of dimensions in the matrix representation

        for name, variable in items(variables_config):
            cardinality += variable['size']
            vdict = {
                'type': variable['type'].lower(),
                'indices': []
            }  # indices stores a mapping from these variable(s) to their matrix column(s)

            if vdict['type'] == 'int':
                vdict['min'] = int(variable['min'])
                vdict['max'] = int(variable['max'])
            elif vdict['type'] == 'float':
                vdict['min'] = float(variable['min'])
                vdict['max'] = float(variable['max'])
            elif vdict['type'] == 'enum':
                vdict['options'] = list(variable['options'])
            else:
                raise Exception("Unknown variable type.")

            for i in xrange(variable['size']):
                if vdict['type'] == 'int':
                    vdict['indices'].append(num_dims)
                    num_dims += 1
                elif vdict['type'] == 'float':
                    vdict['indices'].append(num_dims)
                    num_dims += 1
                elif vdict['type'] == 'enum':
                    vdict['indices'].append(
                        list(
                            np.arange(len(list(variable['options']))) +
                            num_dims))
                    num_dims += len(list(variable['options']))
                else:
                    raise Exception("Unknown variable type.")

            variables_meta[name] = vdict

        return variables_meta, num_dims, cardinality
Beispiel #10
0
def test_grad():
    npr.seed(1)

    eps = 1e-5
    N = 10
    M = 5
    D = 5

    beta_warp = BetaWarp(2)
    norm = Normalization(2)
    lin = Linear(D)
    transformer = Transformer(D)
    # Each entry is a tuple, (transformation, indices_it_acts_on)
    transformer.add_layer(
        (beta_warp, [0, 2]),
        (norm, [1, 4]))  # This is crazy. We would never do this.
    # One transformation means apply to all dimensions.
    transformer.add_layer(lin)

    kernel = TransformKernel(Matern52(lin.num_factors), transformer)

    data1 = npr.rand(N, D)
    data2 = npr.rand(M, D)

    loss = np.sum(kernel.cross_cov(data1, data2))
    dloss = kernel.cross_cov_grad_data(data1, data2).sum(0)

    dloss_est = np.zeros(dloss.shape)
    for i in xrange(M):
        for j in xrange(D):
            data2[i, j] += eps
            loss_1 = np.sum(kernel.cross_cov(data1, data2))
            data2[i, j] -= 2 * eps
            loss_2 = np.sum(kernel.cross_cov(data1, data2))
            data2[i, j] += eps
            dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-6
Beispiel #11
0
def create_task():
    task_name = "mytask"
    task_type = "OBJECTIVE"

    variables_config = OrderedDict([('X', {
        "type": "INT",
        "size": 2,
        "min": -1,
        "max": 10
    }), ('Y', {
        "type": "FLOAT",
        "size": 3,
        "min": -0.003,
        "max": 1e-1
    }), ('Z', {
        "type": "ENUM",
        "size": 2,
        "options": ["one", "two", "three"]
    })])

    variables_meta, num_dims, cardinality = Task.variables_config_to_meta(
        variables_config)

    # Create a set of inputs that satisfies the constraints of each variable
    X = np.zeros((10, num_dims))
    for i in xrange(10):
        for name, variable in items(variables_meta):
            indices = variable['indices']
            if variable['type'] == 'int':
                X[i, indices] = np.random.randint(variable['min'],
                                                  variable['max'] + 1,
                                                  len(indices))
            elif variable['type'] == 'float':
                X[i, indices] = np.random.rand(len(indices)) * (
                    variable['max'] - variable['min']) + variable['min']
            elif variable['type'] == 'enum':
                for ind in indices:
                    cat = np.random.randint(len(ind))
                    X[i, ind[cat]] = 1

    y = np.random.randn(10)

    t = Task(task_name, task_type, variables_config, data=X, values=y)

    return t
def test_fit():
    npr.seed(1)

    N             = 10
    D             = 5
    burnin        = 100
    mcmc_iters    = 100
    num_pending   = 3
    num_fantasies = 2

    gp = GPClassifier(D, burnin=burnin, mcmc_iters=mcmc_iters, num_fantasies=num_fantasies)
    
    inputs     = np.vstack((0.1*npr.rand(N,D),npr.rand(N,D)))
    inputs[12] = np.ones(D)
    pending    = npr.rand(3,D)
    W          = npr.randn(D,1)
    vals       = (inputs - inputs.mean(0)).dot(W).flatten() > 0

    gp.fit(inputs, vals, pending)

    probs = np.zeros(inputs.shape[0])
    for i in xrange(gp.num_states):
        gp.set_state(i)
        probs += (gp.latent_values.value > 0) / float(mcmc_iters)

    assert np.all(probs[:N] < 0.5) and np.all(probs[N:] > 0.5)

    assert gp.values.shape[0] == 2*N + num_pending

    assert gp.values.shape[1] == 2

    assert gp.chain_length == burnin + mcmc_iters
    assert all([np.all(p.value != p.initial_value) for p in gp.params.values()])
    assert len(gp._cache_list) == mcmc_iters
    assert len(gp._hypers_list) == mcmc_iters
    assert len(gp._latent_values_list) == mcmc_iters
    assert len(gp._fantasy_values_list) == mcmc_iters
Beispiel #13
0
    return cur_x, cur_llh
    # return (cur_x, funEvals['funevals']) if returnFunEvals else cur_x


if __name__ == '__main__':
    npr.seed(1)

    import pylab as pl
    import pymc

    D = 10
    fn = lambda x: -0.5 * np.sum(x**2)

    iters = 1000
    samps = np.zeros((iters, D))
    for ii in xrange(1, iters):
        samps[ii, :] = slice_sample(samps[ii - 1, :],
                                    fn,
                                    sigma=0.1,
                                    step_out=False,
                                    doubling_step=True,
                                    verbose=False)

    ll = -0.5 * np.sum(samps**2, axis=1)

    scores = pymc.geweke(ll)
    pymc.Matplot.geweke_plot(scores, 'test')

    pymc.raftery_lewis(ll, q=0.025, r=0.01)

    pymc.Matplot.autocorrelation(ll, 'test')
Beispiel #14
0
def test_predict():
    npr.seed(1)

    N = 10
    Npend = 3
    Ntest = 2
    D = 5

    gp = GP(D, burnin=5, num_fantasies=7)
    pred = npr.rand(Ntest, D)

    # Test with 0 points
    mu, v = gp.predict(pred)
    np.testing.assert_allclose(mu,
                               0,
                               rtol=1e-7,
                               atol=0,
                               err_msg='',
                               verbose=True)
    np.testing.assert_allclose(v,
                               1 + 1e-6,
                               rtol=1e-7,
                               atol=0,
                               err_msg='',
                               verbose=True)

    #Test with 1 point
    X = np.zeros((1, D))
    W = npr.randn(D, 1)
    val = X.dot(W).flatten() + np.sqrt(1e-3) * npr.randn()

    gp.fit(X, val, fit_hypers=False)

    mu, v = gp.predict(pred)

    # Points closer to the origin will have less variance
    if np.linalg.norm(pred[0] - X) < np.linalg.norm(pred[1] - X):
        assert v[0] < v[1]
    else:
        assert v[0] > v[1]

    # Predict at the point itself
    mu, v = gp.predict(X)
    np.testing.assert_allclose(mu,
                               val,
                               rtol=1e-5,
                               atol=0,
                               err_msg='',
                               verbose=True)

    # Now let's make sure it doesn't break with more data and pending
    inputs = npr.rand(N, D)
    vals = inputs.dot(W).flatten() + np.sqrt(1e-3) * npr.randn(N)
    pending = npr.rand(Npend, D)

    gp.fit(inputs, vals, pending)

    mu, v = gp.predict(pred)

    # Now let's check the gradients
    eps = 1e-5

    mu, v, dmu, dv = gp.predict(pred, compute_grad=True)

    # The implied loss is np.sum(mu**2) + np.sum(v**2)
    dloss = 2 * (dmu * mu[:, np.newaxis, :]).sum(2) + 2 * (
        v[:, np.newaxis, np.newaxis] * dv).sum(2)

    dloss_est = np.zeros(dloss.shape)
    for i in xrange(Ntest):
        for j in xrange(D):
            pred[i, j] += eps
            mu, v = gp.predict(pred)
            loss_1 = np.sum(mu**2) + np.sum(v**2)
            pred[i, j] -= 2 * eps
            mu, v = gp.predict(pred)
            loss_2 = np.sum(mu**2) + np.sum(v**2)
            pred[i, j] += eps
            dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps))

    assert np.linalg.norm(dloss - dloss_est) < 1e-6
Beispiel #15
0
def fast_chol_add(L, A): 
    U = L.T

    # Add a row and column to U
    # Assume that you can pass in a cholesky that's the same
    # size as the kernel (then the last row/col will be clobbered)
    if U.shape[0] < A.shape[0]:
        G = np.zeros(A.shape)
        G[:U.shape[0], :U.shape[1]] = U
        U = G
    
    (rows,cols) = A.shape

    isPosDef = 1;
    j = rows-1    
    try:
        code = \
        """
        double s = 0;
        for (int i=0; i<cols; i++) {
            s = A(i,j);
            for (int ind=0; ind<i; ind++)
                s -= U(ind,i) * U(ind,j);

            if (i == j) {
                if (s <= 0) {
                    isPosDef = 0;
                    U(i,i) = 0;
                }
                else {
                    U(i,i) = sqrt(s);
                }
            } else {
                if (U(i,i) > 0) {
                    U(i,j) = s / U(i,i);
                }
                else {
                    U(i,j) = 0;
                }
            }
        }
        """
        scipy.weave.inline(code, ['U','A','j','isPosDef','rows','cols'], \
                               type_converters=scipy.weave.converters.blitz, \
                               compiler='gcc')
    except:
        k = np.arange(cols)
        for i in xrange(cols):
            j = rows-1;
            s = A[i,j] - np.dot(U[k[:i],i].T,U[k[:i],j])
            if i == j:
                if s <= 0:
                    isPosDef = 0
                    U[i,i] = 0
                else:
                    U[i,i] = np.sqrt(s)
            else:
                if U[i,i] > 0:
                    U[i,j] = s / U[i,i]
                else:
                    U[i,j] = 0

    L = U.T
    return L, isPosDef
Beispiel #16
0
    def geweke_correctness_test(self):
        print('Initiating Geweke Correctness test')
        # Note: the horseshoe prior on the noise will make the line slightly not straight
        # because we don't have the actual log pdf

        import matplotlib.pyplot as plt

        # First, check that all priors and models can be sampled from
        for param in self.hypers:
            if not hasattr(param.prior, 'sample'):
                print('Prior of param %s cannot be sampled from. Cannot perform the Geweke correctness test.' % param.name)
                return

        n = 10000 # number of samples # n = self.mcmc_iters
        statistic_of_interest = np.mean

        true_data = copy.copy(self.data) # reset this at the end

        # Case A: 
            # 1) Draw new hypers from priors
            # 2) Draw new data given hypers (**NOT** given hypers and data !!!!)
        caseA = np.zeros(n)
        for i in xrange(n):
            if i % 1000 == 0:
                print('Geweke Part A Sample %d/%d' % (i,n))
            for param in self.hypers:
                param.sample_from_prior()
            latent_y = self.sample_from_prior_given_hypers(self.data) # only inputs used
            
            # fants = latent_y
            fants = self.observation_model(latent_y)
            # self.noise.print_diagnostics()
            # print fants

            caseA[i] = statistic_of_interest(fants)

        # Case B:
            # 1) Resample all hypers one step given data
            # 2) Resample data given hypers
            # repeat a bunch of times
        caseB = np.zeros(n)
        for i in xrange(n):
            if i % 1000 == 0:
                print('Geweke Part B Sample %d/%d' % (i, n))
            # Take MCMC step on theta given data
            self.sampler.generate_sample() # data['inputs'] and data['values'] used

            # Resample data
            latent_y = self.sample_from_prior_given_hypers(self.data) # only data['inputs'] used

            # self.data['values'] = latent_y
            self.data['values'] = self.observation_model(latent_y)  # add noise
            # self.noise.print_diagnostics()
            # print self.data['values']

            caseB[i] = statistic_of_interest(self.data['values'])
        
        print(np.mean(caseA))
        print(np.std(caseA))
        print(np.mean(caseB))
        print(np.std(caseB))

        # Then, sort the sets A and B.
        caseA = np.sort(caseA)
        caseB = np.sort(caseB)

        # Then for each a in A, take the fraction of B smaller than it. 
        yAxis = np.zeros(n)
        for i in xrange(n):
            yAxis[i] = np.sum(caseB < caseA[i]) / float(n)

        xAxis = np.arange(n)/float(n)
        # Plot fractional index of a vs this fraction. 
        # Repeat for all a in A so number of points on graph is |A| ( = |B| )

        if not os.path.isdir('diagnostics'):
            os.mkdir('diagnostics')
        if not os.path.isdir('diagnostics/correctness'):
            os.mkdir('diagnostics/correctness')

        plt.figure(1)
        plt.clf()
        plt.plot(xAxis, yAxis, 'b')
        plt.plot(xAxis, xAxis, '--r')
        plt.title('Geweke test P-P plot with %d samples' % n)
        plt.savefig('diagnostics/correctness/GewekeCorrectness_%d_samples.pdf' % n)


        self.data = true_data