Example #1
0
def test_isinstance():
    def fun(x):
        assert ag_isinstance(x, dict)
        assert ag_isinstance(x, ag_dict)
        return x['x']
    fun({'x': 1.})
    grad(fun)({'x': 1.})
Example #2
0
    def compare_smoother_grads(lds):
        init_params, pair_params, node_params = lds

        symmetrize = make_unop(lambda x: (x + x.T)/2. if np.ndim(x) == 2 else x, tuple)

        messages, _ = natural_filter_forward_general(*lds)
        dotter = randn_like(natural_smoother_general(messages, *lds))

        def py_fun(messages):
            result = natural_smoother_general(messages, *lds)
            assert shape(result) == shape(dotter)
            return contract(dotter, result)

        dense_messages, _ = _natural_filter_forward_general(
            init_params, pair_params, node_params)
        def cy_fun(messages):
            result = _natural_smoother_general(messages, pair_params)
            result = result[0][:3], result[1], result[2]
            assert shape(result) == shape(dotter)
            return contract(dotter, result)

        result_py = py_fun(messages)
        result_cy = cy_fun(dense_messages)
        assert np.isclose(result_py, result_cy)

        g_py = grad(py_fun)(messages)
        g_cy = unpack_dense_messages(grad(cy_fun)(dense_messages))

        assert allclose(g_py, g_cy)
Example #3
0
    def unwrap(self, output, i, *args, **kwargs):

        if not hasattr(output, '__iter__'):

            def _wrap(*args, **kwargs):
                return self.func(*args, **kwargs)[i]
            
            dfunc = grad(_wrap)
            return dfunc(*args, **kwargs)

        elif isinstance(output, np.ndarray):
            shape = output.shape
            J = []
            axes = []

            for dimen in shape:
                axes.append(range(dimen))

            for idx in product(*axes):

                def _wrap(*args, **kwargs):
                    return self.func(*args, **kwargs)[i][idx]

                dfunc = grad(_wrap)

                J.append(dfunc(*args, **kwargs))

            if hasattr(J[0], "__iter__"):
                return list(map(list, zip(*J)))
            else:
                return J
Example #4
0
def test_fast_conv_grad():
    skip = 1
    block_size = (11, 11)
    depth = 1
    img   = np.random.randn(51, 51, depth)
    filt  = np.dstack([cv.gauss_filt_2D(shape=block_size,sigma=2) for k in range(depth)])
    filt = cv.gauss_filt_2D(shape=block_size, sigma=2)
    def loss_fun(filt):
        out = fc.convolve(filt, img)
        return np.sum(np.sin(out) + out**2)
    loss_fun(filt)
    loss_grad = grad(loss_fun)

    def loss_fun_slow(filt):
        out = auto_convolve(img.squeeze(), filt, mode='valid') 
        return np.sum(np.sin(out) + out**2)
    loss_fun_slow(filt)
    loss_grad_slow = grad(loss_fun_slow)

    # compare gradient timing
    loss_grad_slow(filt)
    loss_grad(filt)

    ## check numerical gradients
    num_grad = np.zeros(filt.shape)
    for i in xrange(filt.shape[0]):
        for j in xrange(filt.shape[1]):
            de = np.zeros(filt.shape)
            de[i, j] = 1e-4
            num_grad[i,j] = (loss_fun(filt + de) - loss_fun(filt - de)) / (2*de[i,j])

    assert np.allclose(loss_grad(filt), num_grad), "convolution gradient failed!"
Example #5
0
def test_isinstance():
    def fun(x):
        assert ag_isinstance(x, tuple)
        assert ag_isinstance(x, ag_tuple)
        return x[0]
    fun((1., 2., 3.))
    grad(fun)((1., 2., 3.))
Example #6
0
def test_isinstance():
    def fun(x):
        assert ag_isinstance(x, list)
        assert ag_isinstance(x, ag_list)
        return x[0]
    fun([1., 2., 3.])
    grad(fun)([1., 2., 3.])
Example #7
0
def test_array_creation():
    # Will always pass, but will take ages (like a minute) if the complexity of
    # array creation is O(N)
    N = 100000
    def fun(x):
        arr = [x for i in range(N)]
        return np.sum(np.array(arr))
    grad(fun)(1.0)
Example #8
0
def test_sub():
    fun = lambda x, y : to_scalar(x - y)
    d_fun_0 = lambda x, y : to_scalar(grad(fun, 0)(x, y))
    d_fun_1 = lambda x, y : to_scalar(grad(fun, 1)(x, y))
    for arg1, arg2 in arg_pairs():
        check_grads(fun, arg1, arg2)
        check_grads(d_fun_0, arg1, arg2)
        check_grads(d_fun_1, arg1, arg2)
Example #9
0
def peakmem_needless_nodes():
    N, M = 1000, 100
    def fun(x):
        for i in range(M):
            x = x + 1
        return np.sum(x)

    grad(fun)(np.zeros((N, N)))
Example #10
0
def check_fft_n(fft_fun, D, n):
    def fun(x): return to_scalar(fft_fun(x, D + n))
    d_fun = lambda x : to_scalar(grad(fun)(x))
    mat = npr.randn(D, D)
    mat = match_complex(fft_fun, mat)
    assert_array_equal(grad(fun)(mat).shape, mat.shape)
    check_grads(fun, mat)
    check_grads(d_fun, mat)
Example #11
0
def test_return_both():
    fun = lambda x : 3.0 * np.sin(x)
    d_fun = grad(fun)
    f_and_d_fun = grad(fun, return_function_value=True)

    test_x = npr.randn()
    f, d = f_and_d_fun(test_x)
    assert f == fun(test_x)
    assert d == d_fun(test_x)
Example #12
0
def fan_out_fan_in():
    """The 'Pearlmutter test' """
    def fun(x):
        for i in range(10**4):
            x = (x + x)/2.0
        return np.sum(x)

    with tictoc():
        grad(fun)(1.0)
Example #13
0
def test_add():
    fun = lambda x, y : to_scalar(x + y)
    d_fun_0 = lambda x, y : to_scalar(grad(fun, 0)(x, y))
    d_fun_1 = lambda x, y : to_scalar(grad(fun, 1)(x, y))
    for arg1, arg2 in arg_pairs():
        print(type(arg1), type(arg2))
        check_grads(fun, arg1, arg2)
        check_grads(d_fun_0, arg1, arg2)
        check_grads(d_fun_1, arg1, arg2)
Example #14
0
def check_binary_func(fun):
    x, y = 0.7, 1.8
    a = grad(fun)(x, y)
    b = nd(lambda x: fun(x, y), x)
    check_close(a, b)

    a = grad(fun, 1)(x, y)
    b = nd(lambda y: fun(x, y), y)
    check_close(a, b)
Example #15
0
def test_nested_higher_order():
    def outer_fun(x):
        def inner_fun(y):
            return y[0] * y[1]
        return np.sum(np.sin(np.array(grad(inner_fun)(ag_tuple((x,x))))))

    check_grads(outer_fun)(5.)
    check_grads(grad(outer_fun))(10.)
    check_grads(grad(grad(outer_fun)))(10.)
Example #16
0
def test_nograd():
    # we want this to raise non-differentiability error
    fun = lambda x: np.allclose(x, (x*3.0)/3.0)
    try:
        grad(fun)(np.array([1., 2., 3.]))
    except TypeError:
        pass
    else:
        raise Exception('Expected non-differentiability exception')
Example #17
0
def test_return_both():
    fun = lambda x : 3.0 * x**3.2
    d_fun = grad(fun)
    f_and_d_fun = grad(fun, return_function_value=True)

    test_x = 1.7
    f, d = f_and_d_fun(test_x)
    assert f == fun(test_x)
    assert d == d_fun(test_x)
Example #18
0
def test_third_derivative():
    fun = lambda x : np.sin(np.sin(x) + np.sin(x))
    df = grad(fun)
    ddf = grad(fun)
    dddf = grad(fun)
    check_grads(fun, npr.randn())
    check_grads(df, npr.rand())
    check_grads(ddf, npr.rand())
    check_grads(dddf, npr.rand())
Example #19
0
def test_third_derivative_other_args2():
    fun = lambda x, y : np.sin(np.sin(x) + np.sin(y))
    df = grad(fun, 1)
    ddf = grad(fun)
    dddf = grad(fun, 1)
    check_grads(fun, npr.randn(), npr.randn())
    check_grads(df, npr.randn(), npr.randn())
    check_grads(ddf, npr.randn(), npr.randn())
    check_grads(dddf, npr.randn(), npr.randn())
Example #20
0
def check_binary_func(fun, independent=False):
    with warnings.catch_warnings(independent) as w:
        x, y = 0.7, 1.8
        a = grad(fun)(x, y)
        b = nd(lambda x: fun(x, y), x)
        check_close(a, b)

        a = grad(fun, 1)(x, y)
        b = nd(lambda y: fun(x, y), y)
        check_close(a, b)
Example #21
0
def test_power_arg0():
    # the +1.'s here are to avoid regimes where numerical diffs fail
    make_fun = lambda y: lambda x: np.power(x, y)
    fun = make_fun(npr.randn()**2 + 1.)
    check_grads(fun)(npr.rand()**2 + 1.)

    # test y == 0. as a special case, c.f. #116
    fun = make_fun(0.)
    assert grad(fun)(0.) == 0.
    assert grad(grad(fun))(0.) == 0.
Example #22
0
def test_dtypes():
    def f(x):
        return np.sum(x**2)

    # Array y with dtype np.float32
    y = np.random.randn(10, 10).astype(np.float32)
    assert grad(f)(y).dtype.type is np.float32

    y = np.random.randn(10, 10).astype(np.float16)
    assert grad(f)(y).dtype.type is np.float16
def test_pow():
    fun = lambda x, y : to_scalar(x ** y)
    d_fun_0 = lambda x, y : to_scalar(grad(fun, 0)(x, y))
    d_fun_1 = lambda x, y : to_scalar(grad(fun, 1)(x, y))
    make_positive = lambda x : np.abs(x) + 1.1 # Numeric derivatives fail near zero
    for arg1, arg2 in arg_pairs():
        arg1 = make_positive(arg1)
        check_grads(fun, arg1, arg2)
        check_grads(d_fun_0, arg1, arg2)
        check_grads(d_fun_1, arg1, arg2)
Example #24
0
def test_div():
    fun = lambda x, y : to_scalar(x / y)
    d_fun_0 = lambda x, y : to_scalar(grad(fun, 0)(x, y))
    d_fun_1 = lambda x, y : to_scalar(grad(fun, 1)(x, y))
    make_gap_from_zero = lambda x : np.sqrt(x **2 + 0.5)
    for arg1, arg2 in arg_pairs():
        arg1 = make_gap_from_zero(arg1)
        arg2 = make_gap_from_zero(arg2)
        check_grads(fun, arg1, arg2)
        check_grads(d_fun_0, arg1, arg2)
        check_grads(d_fun_1, arg1, arg2)
Example #25
0
def test_compute_stats_grad():
    F = make_unop(lambda x: np.require(x, np.double, 'F'), tuple)

    dotter = F(randn_like(compute_stats(Ex, ExxT, ExnxT, True)))
    g1 = grad(lambda x: contract(dotter, compute_stats(*x)))((Ex, ExxT, ExnxT, 1.))
    g2 = _compute_stats_grad(dotter)
    assert allclose(g1[:3], g2)

    dotter = F(randn_like(compute_stats(Ex, ExxT, ExnxT, False)))
    g1 = grad(lambda x: contract(dotter, compute_stats(*x)))((Ex, ExxT, ExnxT, 0.))
    g2 = _compute_stats_grad(dotter)
    assert allclose(g1[:3], g2)
Example #26
0
def test_mod():
    fun = lambda x, y : to_scalar(x % y)
    d_fun_0 = lambda x, y : to_scalar(grad(fun, 0)(x, y))
    d_fun_1 = lambda x, y : to_scalar(grad(fun, 1)(x, y))
    make_gap_from_zero = lambda x : np.sqrt(x **2 + 0.5)
    for arg1, arg2 in arg_pairs():
        if not arg1 is arg2:  # Gradient undefined at x == y
            arg1 = make_gap_from_zero(arg1)
            arg2 = make_gap_from_zero(arg2)
            check_grads(fun, arg1, arg2)
            check_grads(d_fun_0, arg1, arg2)
            check_grads(d_fun_1, arg1, arg2)
Example #27
0
def test_hess_vector_prod():
    npr.seed(1)
    randv = npr.randn(10)
    def fun(x):
        return np.sin(np.dot(x, randv))
    df = grad(fun)
    def vector_product(x, v):
        return np.sin(np.dot(v, df(x)))
    ddf = grad(vector_product)
    A = npr.randn(10)
    B = npr.randn(10)
    check_grads(fun, A)
    check_grads(vector_product, A, B)
Example #28
0
def test_slices():
    def f(x):
        s = slice(None, -1, None)
        y = x[s]
        return y[0]

    grad(f)([1., 2., 3.])

    def f(x):
        y = x[1:3]
        return y[0]

    grad(f)([1., 2., 3.])
Example #29
0
def test_checkpoint_correctness():
    bar = lambda x, y: 2*x + y + 5
    checkpointed_bar = checkpoint(bar)
    foo = lambda x: bar(x, x/3.) + bar(x, x**2)
    foo2 = lambda x: checkpointed_bar(x, x/3.) + checkpointed_bar(x, x**2)
    assert np.allclose(foo(3.), foo2(3.))
    assert np.allclose(grad(foo)(3.), grad(foo2)(3.))

    baz = lambda *args: sum(args)
    checkpointed_baz = checkpoint(baz)
    foobaz = lambda x: baz(x, x/3.)
    foobaz2 = lambda x: checkpointed_baz(x, x/3.)
    assert np.allclose(foobaz(3.), foobaz2(3.))
    assert np.allclose(grad(foobaz)(3.), grad(foobaz2)(3.))
Example #30
0
def test_jacobian_against_stacked_grads():
    scalar_funs = [
        lambda x: np.sum(x ** 3),
        lambda x: np.prod(np.sin(x) + np.sin(x)),
        lambda x: grad(lambda y: np.exp(y) * np.tanh(x[0]))(x[1]),
    ]

    vector_fun = lambda x: np.array([f(x) for f in scalar_funs])

    x = npr.randn(5)
    jac = jacobian(vector_fun)(x)
    grads = [grad(f)(x) for f in scalar_funs]

    assert np.allclose(jac, np.vstack(grads))
Example #31
0
def burer_monteiro(target_choi,
                   n_decomp,
                   rank,
                   n_qubits,
                   initial_guess=None,
                   cfac_tol=1.):
    choi_dim = target_choi.shape[0]

    # expanding and flattening
    entries = choi_dim * rank

    def extract_matrix(x):
        mat_re = x[0:entries].reshape((rank, choi_dim))
        mat_im = x[entries:2 * entries].reshape((rank, choi_dim))
        return mat_re + 1j * mat_im

    matlen = 2 * entries

    def expand(x):
        arr_Y_pos = list()
        arr_Y_neg = list()
        arr_a_pos = list()
        arr_a_neg = list()
        for i in range(n_decomp):
            arr_Y_pos.append(extract_matrix(x[matlen * i:matlen * (i + 1)]))
        for i in range(n_decomp):
            arr_Y_neg.append(
                extract_matrix(x[matlen * (n_decomp + i):matlen *
                                 (n_decomp + i + 1)]))
        for i in range(n_decomp):
            arr_a_pos.append(x[matlen * 2 * n_decomp + i])
            arr_a_neg.append(x[matlen * 2 * n_decomp + n_decomp + i])
        return arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg

    def flatten_matrix(mat):
        mat_re = np.real(mat).flatten()
        mat_im = np.imag(mat).flatten()
        return [mat_re, mat_im]

    def flatten(arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg):
        tot_list = list()
        for Y in arr_Y_pos:
            tot_list += flatten_matrix(Y)
        for Y in arr_Y_neg:
            tot_list += flatten_matrix(Y)
        tot_list += arr_a_pos
        tot_list += arr_a_neg

        return np.hstack(tot_list)

    # optimization function
    def loss(x):
        arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg = expand(x)
        return np.sum(np.abs(arr_a_pos)) + np.sum(np.abs(arr_a_neg))

    def constraint(x):
        arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg = expand(x)
        arr_C_pos = list()
        arr_C_neg = list()

        def conj(z):
            return np.real(z) - 1j * np.imag(z)

        for i in range(n_decomp):
            arr_C_pos.append(conj(arr_Y_pos[i].T) @ arr_Y_pos[i])
            arr_C_neg.append(conj(arr_Y_neg[i].T) @ arr_Y_neg[i])

        retvec = np.array([])

        # TP constraint
        for i in range(n_decomp):
            pt = anp_partial_trace(arr_C_pos[i], [2**n_qubits, 2**n_qubits], 1)
            vec = (pt - arr_a_pos[i] * np.identity(2**n_qubits)).flatten()
            retvec = np.hstack([retvec, vec])

            pt = anp_partial_trace(arr_C_neg[i], [2**n_qubits, 2**n_qubits], 1)
            vec = (pt - arr_a_neg[i] * np.identity(2**n_qubits)).flatten()
            retvec = np.hstack([retvec, vec])

        # equality constraint
        C_sum = np.zeros_like(target_choi)
        for i in range(n_decomp):
            C_sum += arr_C_pos[i] - arr_C_neg[i]
        vec = (C_sum - target_choi).flatten()
        retvec = np.hstack([retvec, vec])

        # separate complex and real part
        retvec = np.hstack([np.real(retvec), np.imag(retvec)])
        return retvec

    constraint_jac = autograd.jacobian(constraint)
    constraint_hess = autograd.hessian(lambda x, v: np.dot(constraint(x), v),
                                       argnum=0)

    # initial guess
    res = minimize(
        lambda z: np.linalg.norm(
            np_partial_trace(target_choi, [2**n_qubits, 2**n_qubits], 1).data -
            z * np.eye(2**n_qubits)), [1.])
    scale = res.x
    #assert res.fun < 1e-6
    arr_Y_pos = list()
    arr_Y_neg = list()
    arr_a_pos = list()
    arr_a_neg = list()
    if initial_guess is not None:
        for i in range(n_decomp):
            arr_Y_pos.append(initial_guess["Y_pos"][i])
            arr_Y_neg.append(initial_guess["Y_neg"][i])
            arr_a_pos.append(initial_guess["a_pos"][i])
            arr_a_neg.append(initial_guess["a_neg"][i])
    else:
        for i in range(n_decomp):
            arr_Y_pos.append(scale * np.random.normal(size=(rank, choi_dim)) +
                             1j * scale *
                             np.random.normal(size=(rank, choi_dim)))
            arr_Y_neg.append(scale * np.random.normal(size=(rank, choi_dim)) +
                             1j * scale *
                             np.random.normal(size=(rank, choi_dim)))
            arr_a_pos.append(scale * np.random.uniform())
            arr_a_neg.append(scale * np.random.uniform())
    x0 = flatten(arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg)
    len_x0 = x0.shape[0]

    # check flatten+expand
    Yp, Yn, ap, an = expand(x0)
    for i in range(n_decomp):
        assert np.linalg.norm(arr_Y_pos[i] - Yp[i]) < 1e-10
        assert np.linalg.norm(arr_Y_neg[i] - Yn[i]) < 1e-10
        assert np.linalg.norm(arr_a_pos[i] - ap[i]) < 1e-10
        assert np.linalg.norm(arr_a_neg[i] - an[i]) < 1e-10

    # solve
    def new_loss(x):
        return np.sum(np.square(constraint(x)))

    new_loss_grad = autograd.grad(new_loss)

    lc_mat_dense = np.zeros((1, x0.shape[0]))
    lc_mat_dense[0, matlen * 2 * n_decomp:] = np.ones((n_decomp * 2))
    indices_x = np.zeros((n_decomp * 2))
    indices_y = list(range(matlen * 2 * n_decomp, x0.shape[0]))
    vals = np.ones((n_decomp * 2))
    lc_mat = scipy.sparse.csr_matrix((vals, (indices_x, indices_y)),
                                     shape=(1, x0.shape[0]))
    assert np.linalg.norm(lc_mat_dense - lc_mat.toarray()) < 1e-10

    if np.max(np.abs(constraint(x0))) < 1e-8 and np.abs(loss(x0) - 1.) < 1e-8:
        res = OptimizeResult()
        res.x = x0
    else:
        con = LinearConstraint(lc_mat, 1., cfac_tol)
        res = minimize(new_loss,
                       x0,
                       jac=new_loss_grad,
                       constraints=con,
                       options={
                           "verbose": 0,
                           "maxiter": 10000000,
                           "gtol": 1e-12,
                           "xtol": 1e-16
                       },
                       method='trust-constr')
        #assert np.max(np.abs(constraint(res.x))) < 1e-6

    # return
    arr_Y_pos, arr_Y_neg, arr_a_pos, arr_a_neg = expand(res.x)
    arr_C_pos = list()
    arr_C_neg = list()
    for i in range(n_decomp):
        arr_C_pos.append(np.conj(arr_Y_pos[i].T) @ arr_Y_pos[i])
        arr_C_neg.append(np.conj(arr_Y_neg[i].T) @ arr_Y_neg[i])

    return arr_a_pos + arr_a_neg, arr_C_pos + arr_C_neg
Example #32
0
def gradient(objective, argument):
    """
    Compute the gradient of 'objective' with respect to the first
    argument and return as a function.
    """
    return ad.grad(objective)
Example #33
0
#etas = np.arange(-0.8, 1.2, 0.4)
#pts = np.array((3.,7.,15.,20.))
etas = np.array((-0.8, 0.8))
pts = np.array((3., 20.))
#phis = np.arange(-np.pi, np.pi+2.*np.pi/6.,2.*np.pi/6.)
#etas = np.array((-0.8,-0.4))
phis = np.array((-np.pi, np.pi))

x = defineState(len(etas) - 1, len(phis) - 1, datasetJ)

print "minimising"

xtol = np.finfo('float64').eps

grad = grad(nllJ)
hess = hessian(nllJ)

btol = 1.e-8

#lb = [0.999,0.999,0.999,0.999,-0.01,-0.01,-0.01,-0.01,-1e-4,-1e-4,-1e-4,-1e-4,0.]
lb = [0.999, -0.01, -1e-4, 0., 0.]

#ub = [1.001,1.001,1.001,1.001,0.01,0.01,0.01,0.01,1e-4,1e-4,1e-4,1e-4,100]
ub = [1.001, 0.01, 1e-4, 100., 1e9]

constraints = LinearConstraint(A=np.eye(x.shape[0]),
                               lb=lb,
                               ub=ub,
                               keep_feasible=True)
Example #34
0
    def fit(self, X, B, T, W=None):
        '''Fits the model.

        :param X: numpy matrix of shape :math:`k \\cdot n`
        :param B: numpy vector of shape :math:`n`
        :param T: numpy vector of shape :math:`n`
        :param W: (optional) numpy vector of shape :math:`n`
        '''

        if W is None:
            W = numpy.ones(len(X))
        X, B, T, W = (Z if type(Z) == numpy.ndarray else numpy.array(Z)
                      for Z in (X, B, T, W))
        keep_indexes = (T > 0) & (B >= 0) & (B <= 1) & (W >= 0)
        if sum(keep_indexes) < X.shape[0]:
            n_removed = X.shape[0] - sum(keep_indexes)
            warnings.warn('Warning! Removed %d/%d entries from inputs where '
                          'T <= 0 or B not 0/1 or W < 0' % (n_removed, len(X)))
            X, B, T, W = (Z[keep_indexes] for Z in (X, B, T, W))
        n_features = X.shape[1]

        # scipy.optimize and emcee forces the the parameters to be a vector:
        # (log k, log p, log sigma_alpha, log sigma_beta,
        #  a, b, alpha_1...alpha_k, beta_1...beta_k)
        # Generalized Gamma is a bit sensitive to the starting point!
        x0 = numpy.zeros(6 + 2 * n_features)
        x0[0] = +1 if self._fix_k is None else log(self._fix_k)
        x0[1] = -1 if self._fix_p is None else log(self._fix_p)
        args = (X, B, T, W, self._fix_k, self._fix_p, self._hierarchical,
                self._flavor)

        # Set up progressbar and callback
        bar = progressbar.ProgressBar(widgets=[
            progressbar.Variable('loss', width=15, precision=9), ' ',
            progressbar.BouncingBar(), ' ',
            progressbar.Counter(width=6), ' [',
            progressbar.Timer(), ']'
        ])

        def callback(LL, value_history=[]):
            value_history.append(LL)
            bar.update(len(value_history), loss=LL)

        # Define objective and use automatic differentiation
        f = lambda x: -generalized_gamma_loss(x, *args, callback=callback)
        jac = autograd.grad(lambda x: -generalized_gamma_loss(x, *args))

        # Find the maximum a posteriori of the distribution
        res = scipy.optimize.minimize(f,
                                      x0,
                                      jac=jac,
                                      method='SLSQP',
                                      options={'maxiter': 9999})
        if not res.success:
            raise Exception('Optimization failed with message: %s' %
                            res.message)
        result = {'map': res.x}

        # TODO: should not use fixed k/p as search parameters
        if self._fix_k:
            result['map'][0] = log(self._fix_k)
        if self._fix_p:
            result['map'][1] = log(self._fix_p)

        # Make sure we're in a local minimum
        gradient = jac(result['map'])
        gradient_norm = numpy.dot(gradient, gradient)
        if gradient_norm >= 1e-2 * len(X):
            warnings.warn('Might not have found a local minimum! '
                          'Norm of gradient is %f' % gradient_norm)

        # Let's sample from the posterior to compute uncertainties
        if self._ci:
            dim, = res.x.shape
            n_walkers = 5 * dim
            sampler = emcee.EnsembleSampler(
                nwalkers=n_walkers,
                dim=dim,
                lnpostfn=generalized_gamma_loss,
                args=args,
            )
            mcmc_initial_noise = 1e-3
            p0 = [
                result['map'] + mcmc_initial_noise * numpy.random.randn(dim)
                for i in range(n_walkers)
            ]
            n_burnin = 100
            n_steps = numpy.ceil(2000. / n_walkers)
            n_iterations = n_burnin + n_steps

            bar = progressbar.ProgressBar(max_value=n_iterations,
                                          widgets=[
                                              progressbar.Percentage(), ' ',
                                              progressbar.Bar(),
                                              ' %d walkers [' % n_walkers,
                                              progressbar.AdaptiveETA(), ']'
                                          ])
            for i, _ in enumerate(sampler.sample(p0, iterations=n_iterations)):
                bar.update(i + 1)
            result['samples'] = sampler.chain[:, n_burnin:, :] \
                                       .reshape((-1, dim)).T
            if self._fix_k:
                result['samples'][0, :] = log(self._fix_k)
            if self._fix_p:
                result['samples'][1, :] = log(self._fix_p)

        self.params = {
            k: {
                'k': exp(data[0]),
                'p': exp(data[1]),
                'a': data[4],
                'b': data[5],
                'alpha': data[6:6 + n_features].T,
                'beta': data[6 + n_features:6 + 2 * n_features].T,
            }
            for k, data in result.items()
        }
Example #35
0
    def get_batch_lower_bound(cur_params, iter):
        encoder_weights = combined_parser.get(cur_params, 'encoder weights')
        flow_params     = combined_parser.get(cur_params, 'flow params')
        decoder_weights = combined_parser.get(cur_params, 'decoder weights')

        cur_data = train_images[batch_idxs[iter]]
        mus, log_sigs = encoder(encoder_weights, cur_data)
        samples, entropy_estimates = flow_sampler(flow_params, mus, np.exp(log_sigs), rs)
        loglikes = decoder_log_like(decoder_weights, samples, cur_data)

        print "Iter", iter, "loglik:", np.mean(loglikes).value, \
            "entropy:", np.mean(entropy_estimates).value, "marg. like:", np.mean(entropy_estimates + loglikes).value
        return np.mean(entropy_estimates + loglikes)

    lb_grad = grad(get_batch_lower_bound)

    def callback(weights, iter, grad):
        #Generate samples
        num_samples = 100
        zs = rs.randn(num_samples, latent_dimension)
        samples = decoder(combined_parser.get(weights, 'decoder weights'), zs)
        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(samples, ax, ims_per_row=10)
        plt.savefig('samples.png')

    final_params = adam(lb_grad, combined_params, num_training_iters, callback=callback)

    finish_time = time.time()
Example #36
0
def test_angle_real():
    fun = lambda x: np.angle(x)
    d_fun = lambda x: grad(fun)(x)
    check_grads(fun)(npr.rand())
    check_grads(d_fun)(npr.rand())
Example #37
0
def build_branin_objective(D=100):
    obj_grad = grad(branin)
    obj_hvp = sliced_hvp(obj_grad)
    return D, branin, obj_grad, obj_hvp, {}
Example #38
0
def test_abs_complex():
    fun = lambda x: np.abs(x)
    d_fun = lambda x: grad(fun)(x)
    check_grads(fun)(1.1 + 1.2j)
    check_grads(d_fun)(1.1 + 1.3j)
Example #39
0
        t2 = targets * 2 - 1
        t2 = t2[:, np.newaxis, :]
        # Now t2 is -1 or 1, which makes the following form nice
        label_probabilities = -np.logaddexp(0, -unnormalized_logprobs * t2)
        return np.sum(label_probabilities, axis=-1)  # Sum across pixels.

    def batched_loss(params, iter):
        data_idx = batch_indices(iter)
        return neglogprob(params, train_images[data_idx, :])

    def neglogprob(params, data):
        return np.log(K) - logsumexp(bernoulli_log_density(data, params),
                                     axis=-1).mean()

    # Get gradient of objective using autograd.
    objective_grad = grad(batched_loss)

    def print_perf(params, iter, gradient):
        if iter % 30 == 0:
            save_images(sigmoid(params),
                        'results/4/thetas.png',
                        vmin=0.0,
                        vmax=1.0)
            print(batched_loss(params, iter))

    # The optimizers provided by autograd can optimize lists, tuples, or dicts of parameters.
    # You may use these optimizers for Q4, but implement your own gradient descent optimizer for Q3!
    optimized_params = adam(objective_grad,
                            theta,
                            step_size=0.2,
                            num_iters=10000,
Example #40
0
def test_polygamma():
    x = npr.randn()
    fun = lambda x: to_scalar(autograd.scipy.special.polygamma(0, x))
    d_fun = grad(fun)
    check_grads(fun, x)
    check_grads(d_fun, x)
Example #41
0
def test_yn():
    x = npr.randn()**2 + 0.2
    fun = lambda x: to_scalar(autograd.scipy.special.yn(2, x))
    d_fun = grad(fun)
    check_grads(fun, x)
    check_grads(d_fun, x)
Example #42
0
network_size = [2, 128, 128, 128, 1]
A = [sigmoid, sigmoid, sigmoid, identity]
network = simple_MLP(network_size, A)
layer_data = network.layer_data
L = network.L


# trial solution
def v(x, t, layer_data):
    input = np.array([x, t])
    return np.sin(np.pi * x) + x * (x - 1) * t * network.input_to_output(
        input, layer_data)


# applying the operator D := Dxx - Dt to the trial solution
v_xx = grad(grad(v, 0), 0)
v_t = grad(v, 1)


def Dv(x, t, layer_data):
    return v_xx(x, t, layer_data) - v_t(x, t, layer_data)


# cost function
def cost_function(domain, layer_data):
    x, t = domain[0], domain[1]
    Dv_eval = np.array([Dv(x_, t_, layer_data) for t_ in t for x_ in x])
    cost = np.dot(Dv_eval, Dv_eval)
    return cost / np.size(x) / np.size(x)

Example #43
0
        print ('b1', params[2][1][2])
        # print ('b', params[2][2])



        plt.cla()
        target_distribution = lambda x: np.exp(log_density(x))
        var_distribution    = lambda x: np.exp(variational_log_density(params, x))
        plot_isocontours(ax, target_distribution)
        plot_isocontours(ax, var_distribution, cmap=plt.cm.bone)
        ax.set_autoscale_on(False)


        # rs = npr.RandomState(0)
        # samples = variational_sampler(params, num_plotting_samples, rs)
        # plt.plot(samples[:, 0], samples[:, 1], 'x')

        plt.draw()
        plt.pause(1.0/30.0)

    print("Optimizing variational parameters...")
    variational_params = adam(grad(objective), init_var_params(D), step_size=0.1,
                              num_iters=2000, callback=callback)







Example #44
0
    def distance_from_target_image(smoke):
        return np.mean((target - smoke)**2)

    def convert_param_vector_to_matrices(params):
        vx = np.reshape(params[:(rows * cols)], (rows, cols))
        vy = np.reshape(params[(rows * cols):], (rows, cols))
        return vx, vy

    def objective(params):
        init_vx, init_vy = convert_param_vector_to_matrices(params)
        final_smoke = simulate(init_vx, init_vy, init_smoke,
                               simulation_timesteps)
        return distance_from_target_image(final_smoke)

    # Specify gradient of objective function using autograd.
    objective_with_grad = grad(objective, return_function_value=True)

    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(111, frameon=False)

    def callback(params):
        init_vx, init_vy = convert_param_vector_to_matrices(params)
        simulate(init_vx, init_vy, init_smoke, simulation_timesteps, ax)

    print "Optimizing initial conditions..."
    result = minimize(objective_with_grad,
                      init_dx_and_dy,
                      jac=True,
                      method='CG',
                      options={
                          'maxiter': 25,
Example #45
0
 def d_fun(input_list):
     g = grad(fun)(input_list)
     A = np.sum(g[0])
     B = np.sum(np.sin(g[0]))
     C = np.sum(np.sin(g[1]))
     return A + B + C
Example #46
0
 def flow_eq_grad1(self, mf, pu, pd):
     return grad(self.flow_eq, 0)(mf, pu,
                                  pd), -1., 1., self.flow_eq(mf, pu, pd)
Example #47
0
def test_real_type():
    fun = lambda x: np.sum(np.real(x))
    df = grad(fun)
    assert np.isrealobj(df(2.0))
    assert np.iscomplexobj(df(1.0j))
def logistic_predictions(weights, inputs):
    # Outputs probability of a label being true according to logistic model.
    return sigmoid(np.dot(inputs, weights))


def training_loss(weights):
    # Training loss is the negative log-likelihood of the training labels.
    preds = logistic_predictions(weights, inputs)
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))


# Build a toy dataset.
inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autograd.
training_gradient_fun = grad(training_loss)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(training_loss, weights)

# Optimize weights using gradient descent.
print("Initial loss:", training_loss(weights))
for i in range(100):
    weights -= training_gradient_fun(weights) * 0.01

print("Trained loss:", training_loss(weights))
Example #49
0
def test_abs_real():
    fun = lambda x: np.abs(x)
    d_fun = lambda x: grad(fun)(x)
    check_grads(fun)(1.1)
    check_grads(d_fun)(2.1)
Example #50
0
    def __init__(self,
                 point_estimate,
                 demo_func,
                 data,
                 mut_rate=None,
                 length=1,
                 regime="long",
                 psd_rtol=1e-8,
                 **kwargs):
        """
        Parameters
        ----------
        point_estimate : array
                 a statistically consistent estimate for the true parameters.
                 confidence regions and hypothesis tests are computed for a (shrinking)
                 neighborhood around this point.
        demo_func : function that returns a Demography from parameters
        data : SegSites (or Sfs, if regime="many")
        regime : the limiting regime for the asymptotic confidence region
              if "long", number of loci is fixed, and the length of the loci -> infinity.
                 * uses time series information to estimate covariance structure
                 * requires isinstance(data, SegSites)
                 * loci should be independent. they don't have to be identically distributed
              if "many", the number of loci -> infinity
                 * loci should be independent, and roughly identically distributed
        psd_rtol: for checking if certain matrices (e.g. covariance matrices) are positive semidefinite
              if psd_rtol = epsilon, then we will consider a matrix positive semidefinite if its most
              negative eigenvalue has magnitude less than epsilon * most positive eigenvalue.
        **kwargs : additional arguments passed into composite_log_likelihood
        """
        if regime not in ("long", "many"):
            raise ValueError("Unrecognized regime '%s'" % regime)

        try:
            data = data.seg_sites
        except AttributeError:
            data = data

        if mut_rate is not None:
            mut_rate = mut_rate * length

        self.point = np.array(point_estimate)
        self.demo_func = demo_func
        self.data = data
        self.regime = regime
        self.kwargs = dict(kwargs)
        self.psd_rtol = psd_rtol

        self.score = autograd.grad(self.lik_fun)(self.point)
        self.score_cov = _observed_score_covariance(self.regime,
                                                    self.point,
                                                    self.data,
                                                    self.demo_func,
                                                    psd_rtol=self.psd_rtol,
                                                    mut_rate=mut_rate,
                                                    **self.kwargs)
        self.fisher = _observed_fisher_information(self.point,
                                                   self.data,
                                                   self.demo_func,
                                                   psd_rtol=self.psd_rtol,
                                                   assert_psd=False,
                                                   mut_rate=mut_rate,
                                                   **self.kwargs)
Example #51
0
def test_angle_complex():
    fun = lambda x: np.angle(x)
    d_fun = lambda x: grad(fun)(x)
    check_grads(fun)(npr.rand() + 1j * npr.rand())
    check_grads(d_fun)(npr.rand() + 1j * npr.rand())
Example #52
0
def test_drift_force(X, alpha):
    psi = SimpleGaussian(alpha)
    pool = SumPooling(psi)

    expected = 2 * grad(sum_pool_np, 0)(X, alpha) / sum_pool_np(X, alpha)
    assert_close(expected.ravel(), pool.drift_force(X))
Example #53
0
def main():
    num_iters = 10
    X, y = make_classification(
        100,
        n_classes=3,
        n_informative=3,
        n_redundant=0,
        n_clusters_per_class=2,
        n_features=20,
    )

    model = lgb.LGBMClassifier(
        boosting_type="gbdt", objective="binary", n_estimators=3, random_state=1
    )
    model.fit(X, y)

    model_dump = model.booster_.dump_model()
    trees_ = [m["tree_structure"] for m in model_dump["tree_info"]]

    # needs to infer from model.predict_proba? or labelbinarizer
    lb = LabelBinarizer()
    y_ohe = lb.fit_transform(y)
    nclass = y_ohe.shape[1]
    if nclass == 2:
        y_ohe = y

    if nclass > 2:
        trees = split_trees_by_classes(trees_, nclass)
        trees_params = multiclass_trees_to_param(X, y, trees)
        model_ = gbm_gen(
            trees_params[0], X, trees_params[2], trees_params[1], True, nclass
        )

        def training_loss(weights, idx=0):
            # Training loss is the negative log-likelihood of the training labels.
            preds = model_(weights, X)
            loglik = -np.sum(np.log(preds + 1e-7) * y_ohe)

            return loglik

    else:
        trees_params = multi_tree_to_param(X, y, trees_)
        model_ = gbm_gen(trees_params[0], X, trees_params[2], trees_params[1], False, 2)

        def training_loss(weights, idx=0):
            # Training loss is the negative log-likelihood of the training labels.
            preds = sigmoid(model_(weights, X))
            label_probabilities = preds * y + (1 - preds) * (1 - y)
            loglik = -np.sum(np.log(label_probabilities))

            return loglik

    # training the model and outputting results
    training_gradient_fun = grad(training_loss)
    param_ = adam(
        training_gradient_fun,
        trees_params[0],
        callback=simple_callback,
        step_size=0.05,
        num_iters=num_iters,
    )

    lgb_predict = model.predict_proba(X)
    if lgb_predict.shape[1] == 2:
        lgb_predict = lgb_predict[:, 1]

    results = {
        "train_base": roc_auc_score(y_ohe, model_(trees_params[0], X)),
        "train_nnet": roc_auc_score(y_ohe, model_(param_, X)),
        "train_lgb": roc_auc_score(y_ohe, lgb_predict),
    }
    return results
    def optimize_latent_weighting_stochastic(self,
                                             exp_buffer,
                                             wb,
                                             task_steps,
                                             state_diffs=False,
                                             use_all_exp=False):
        """Learn the latent weights using gradients of the energy function with respect to the latent weights
		and performing minibatch updates via SGD (ADAM).
		
		Arguments:
		exp_buffer -- Either an ExperienceReplay object, or a list of transitions of a single instance;
		 	if use_all_exp==False, then an ExperienceReplay object must be supplied;
		 	otherwise, a list of transitions must be supplied (where each transition is a numpy array)
		wb -- the latent weights for the specific instance
		task_steps --total steps taken in environment

		Keyword Arguments:
		state_diffs -- boolean indicating if the BNN should predict state differences rather than the next state (default: False)
		use_all_exp -- boolean indicating whether updates should be performed using all experiences
		"""
        # Create gradient functional of the energy function wrt wb
        energy_grad = grad(self.simple_loss, argnum=2)
        # energy_grad = grad(self.energy, argnum=2)
        cur_latent_weights = wb
        m1 = 0
        m2 = 0
        beta1 = 0.9
        beta2 = 0.999
        epsilon = 1e-8
        t = 0
        # With linear top latent weights, use a single sample of the BNN network weights to compute gradients
        if self.linear_latent_weights:
            tmp_num_weight_samples = self.num_weight_samples
            self.num_weight_samples = 1
        for epoch in range(self.wb_opt_epochs):
            # Gather a sample of data from the experience buffer, convert to input and target arrays
            if use_all_exp:
                batch = exp_buffer
            else:
                batch, __, indices = exp_buffer.sample(task_steps)
            # batch: [state,self.__encode_action(action),reward,next_state]
            X = np.array([
                np.hstack([batch[tt, 0], batch[tt, 1]])
                for tt in range(len(batch))
            ])
            y = np.array([batch[tt, 3] for tt in range(len(batch))])
            if state_diffs:
                y = y - X[:, :batch[0, 0].shape[0]]
            self.N = X.shape[0]
            batch_idxs = self.__make_batches__()
            # Permute the indices of the training inputs for SGD purposes
            #permutation = np.random.permutation(X.shape[0])
            permutation = np.random.choice(range(X.shape[0]),
                                           X.shape[0],
                                           replace=False)
            for idxs in batch_idxs:
                t += 1
                grad_wb = energy_grad(self.weights, X[permutation[idxs]],
                                      cur_latent_weights, y[permutation[idxs]])
                # m1 = beta1*m1 + (1-beta1)*grad_wb
                # m2 = beta2*m2 + (1-beta2)*grad_wb**2
                # m1_hat = m1 / (1-beta1**t)
                # m2_hat = m2 / (1-beta2**t)
                # cur_latent_weights -= self.wb_learning_rate * m1_hat / (np.sqrt(m2_hat)+epsilon)
                cur_latent_weights -= self.wb_learning_rate * grad_wb
            # Re-queue sampled data with updated TD-error calculations
            X_latent_weights = np.vstack(
                [cur_latent_weights[0] for i in range(X.shape[0])])
            if not use_all_exp and exp_buffer.mem_priority:
                td_loss = self.get_td_error(np.hstack([X, X_latent_weights]),
                                            y, 0.0, 1.0)
                exp_buffer.update_priorities(
                    np.hstack((np.reshape(td_loss, (len(td_loss), -1)),
                               np.reshape(indices, (len(indices), -1)))))
        if self.linear_latent_weights:
            self.num_weight_samples = tmp_num_weight_samples
        return cur_latent_weights
Example #55
0
 def gradient_function(point):
     return projector(point, grad(cost)(point))
    def fit_network(self,
                    exp_buffer,
                    task_weights,
                    task_steps,
                    state_diffs=False,
                    use_all_exp=False):
        """Learn BNN network weights using gradients of the energy function with respect to the network weights
		and performing minibatch updates via SGD (ADAM).
		
		Arguments:
		exp_buffer -- Either an ExperienceReplay object, or a list of transitions;
		 	if use_all_exp==False, then an ExperienceReplay object must be supplied;
		 	otherwise, a list of transitions must be supplied (where each transition is a numpy array)
		task_weights -- the latent weights: a numpy array of with dimensions (number of instances x number of latent weights)
		task_steps --total steps taken in environment

		Keyword Arguments:
		state_diffs -- boolean indicating if the BNN should predict state differences rather than the next state (default: False)
		use_all_exp -- boolean indicating whether updates should be performed using all experiences
		"""
        # Create gradient functional of the energy function wrt W
        energy_grad = grad(self.simple_loss, argnum=0)
        # energy_grad = grad(self.energy, argnum=0)
        weights = np.copy(self.weights)
        m1 = 0
        m2 = 0
        beta1 = 0.9
        beta2 = 0.999
        epsilon = 1e-8
        t = 0

        for epoch in range(self.train_epochs):
            # Gather a sample of data from the experience buffer, convert to input and target arrays
            if use_all_exp:
                batch = exp_buffer
            else:
                batch, __, indices = exp_buffer.sample(task_steps)
            # batch: [state,self.__encode_action(action),reward,next_state]
            X = np.array([
                np.hstack([batch[tt, 0], batch[tt, 1]])
                for tt in range(len(batch))
            ])
            wb = np.array(
                [task_weights[batch[tt, 4], :] for tt in range(len(batch))])
            y = np.array([batch[tt, 3] for tt in range(len(batch))])
            if state_diffs:
                y = y - X[:, :batch[0, 0].shape[0]]
            self.N = X.shape[0]
            batch_idxs = self.__make_batches__()
            # Permute the indices of the training inputs for SGD purposes
            permutation = np.random.permutation(X.shape[0])
            for idxs in batch_idxs:
                t += 1
                grad_w = energy_grad(weights, X[permutation[idxs]],
                                     wb[permutation[idxs]],
                                     y[permutation[idxs]])
                print("GRAD = ", grad_w)
                # m1 = beta1*m1 + (1-beta1)*grad_w
                # m2 = beta2*m2 + (1-beta2)*grad_w**2
                # m1_hat = m1 / (1-beta1**t)
                # m2_hat = m2 / (1-beta2**t)
                # weights = weights - self.learning_rate*m1_hat/(np.sqrt(m2_hat)+epsilon)
                weights = weights - self.learning_rate * grad_w
            # Re-queue sampled data with updated TD-error calculations
            self.weights = weights
            if (not use_all_exp) and exp_buffer.mem_priority:
                td_loss = self.get_td_error(np.hstack([X, wb]), y, 0.0, 1.0)
                exp_buffer.update_priorities(
                    np.hstack((np.reshape(td_loss, (len(td_loss), -1)),
                               np.reshape(indices, (len(indices), -1)))))
Example #57
0
def hamiltonian_monte_carlo(
    n_samples,
    negative_log_prob,
    initial_position,
    tune=500,
    path_len=1,
    initial_step_size=0.1,
):
    """Run Hamiltonian Monte Carlo sampling.

    Parameters
    ----------
    n_samples : int
        Number of samples to return
    negative_log_prob : callable
        The negative log probability to sample from
    initial_position : np.array
        A place to start sampling from.
    tune: int
        Number of iterations to run tuning
    path_len : float
        How long each integration path is. Smaller is faster and more correlated.
    initial_step_size : float
        How long each integration step is. This will be tuned automatically.

    Returns
    -------
    np.array
        Array of length `n_samples`.
    """
    initial_position = np.array(initial_position)
    # autograd magic
    dVdq = grad(negative_log_prob)

    # collect all our samples in a list
    samples = [initial_position]

    # Keep a single object for momentum resampling
    momentum = st.norm(0, 1)

    step_size = initial_step_size
    step_size_tuning = DualAveragingStepSize(step_size)
    # If initial_position is a 10d vector and n_samples is 100, we want 100 x 10 momentum draws
    # we can do this in one call to np.random.normal, and iterate over rows
    size = (n_samples + tune, ) + initial_position.shape[:1]
    for idx, p0 in tqdm(enumerate(momentum.rvs(size=size)), total=size[0]):
        # Integrate over our path to get a new position and momentum
        q_new, p_new = leapfrog(
            samples[-1],
            p0,
            dVdq,
            path_len=2 * np.random.rand() *
            path_len,  # We jitter the path length a bit
            step_size=step_size,
        )

        # Check Metropolis acceptance criterion
        start_log_p = np.sum(momentum.logpdf(p0)) - negative_log_prob(
            samples[-1])
        new_log_p = np.sum(momentum.logpdf(p_new)) - negative_log_prob(q_new)
        p_accept = min(1, np.exp(new_log_p - start_log_p))
        if np.random.rand() < p_accept:
            samples.append(q_new)
        else:
            samples.append(np.copy(samples[-1]))
        if idx < tune - 1:
            step_size, _ = step_size_tuning.update(p_accept)
        elif idx == tune - 1:
            _, step_size = step_size_tuning.update(p_accept)

    return np.array(samples[1 + tune:])
def map_gpp_bnn(layer_sizes, nonlinearity=np.tanh,
                n_data=200, N_samples=10,
                L2_reg=0.1, noise_var=0.1):

    shapes = list(zip(layer_sizes[:-1], layer_sizes[1:]))
    N_weights = sum((m+1)*n for m, n in shapes)

    def unpack_params(params):
        mean, log_std = params[:N_weights], params[N_weights:]
        return mean, log_std

    def unpack_layers(weights):
        """ iterable that unpacks the weights into relevant tensor shapes for each layer"""
        num_weight_sets = len(weights)
        for m, n in shapes:
            yield weights[:, :m*n]     .reshape((num_weight_sets, m, n)),\
                  weights[:, m*n:m*n+n].reshape((num_weight_sets, 1, n))
            weights = weights[:, (m+1)*n:]

    def predictions(weights, inputs):
        """ implements the forward pass of the bnn
        weights | dim = [N_weight_samples, N_weights]
        inputs  | dim = [N_data]
        outputs | dim = [N_weight_samples, N_data, 1] """

        inputs = np.expand_dims(inputs, 0)
        for W, b in unpack_layers(weights):
            outputs = np.einsum('mnd,mdo->mno', inputs, W) + b
            inputs = nonlinearity(outputs)
        return outputs

    def sample_gpp(x, n_samples):
        """ Samples from the gp prior x = inputs with shape [N_data]
        returns : samples from the gp prior [N_data, N_samples] """
        x = np.ravel(x)
        n_data = len(x)
        K = covariance(x[:, None], x[:, None])
        L = cholesky(K + 1e-7 * np.eye(n_data))
        e = rs.randn(n_data, n_samples)
        return np.dot(L, e)

    def log_gp_prior(y_bnn, x):
        """ computes: the expectation value of the log of the gp prior :
        E [ log p_gp(f) ] where p_gp(f) = N(f|0,K) where f ~ p_BNN(f)
        = -0.5 * E [ (L^-1f)^T(L^-1f) ] + const; K = LL^T (cholesky decomposition)
        (we ignore constants for now as we are not optimizing the covariance hyper-params)

        bnn_weights                   |  dim = [N_weights_samples, N_weights]
        K = covariance/Kernel matrix  |  dim = [N_data, N_data] ; dim L = dim K
        y_bnn output of a bnn         |  dim = [N_data, N_weights_samples]
        returns : E[log p_gp(y)]      |  dim = [N_function_samples] """

        K = covariance(x, x)+noise_var*np.eye(len(x))   # shape [N_data, N_data]
        L = cholesky(K)                                 # K = LL^T ; shape L = shape K
        a = solve(L, y_bnn)                             # a = L^-1 y_bnn ; shape L^-1 y_bnn =
        log_gp = -0.5*np.mean(a**2, axis=0)             # Compute E [a^2]
        return log_gp

    def log_prob(weights, inputs, targets):
        """ computes log p(y,w) = log p(y|w)+ log p(w) with p(w) = N(w|0,I)

        weights:                  |  dim = [N_weight_samples, N_weights]
        preds = f                 |  dim = [N_weight_samples, N_data, 1]
        targets = y               |  dim = [N_data]

        log_prior = log p(w)      |  dim = [N_weights_samples]
        log_lik = log(y|w)        |  dim = [N_weights_samples] """


        log_prior = -L2_reg * np.sum(weights ** 2, axis=1)
        preds = predictions(weights, inputs)
        log_lik = -np.sum((preds - targets)**2, axis=1)[:, 0] / noise_var

        return log_prior + log_lik

    def gaussian_entropy(log_std):
        return 0.5 * N_weights * (1.0 + np.log(2*np.pi)) + np.sum(log_std)

    def elbo(var_param, x, y):
        """ Provides a stochastic estimate of the evidence lower bound
        ELBO = E_r(w) [log p(y,w)-r(w)]

        params          |   dim = [2*N_weights]
        mean, log_std   |   dim = [N_weights]
        ws              |   dim = [N_samples, N_weights]
        returns : ELBO  |   dim = [1] """

        mean, log_std = unpack_params(var_param)
        ws = rs.randn(N_samples, N_weights) * np.exp(log_std) + mean  # sample weights from r(w)
        return gaussian_entropy(log_std) + np.mean(log_prob(ws, x, y))  # ELBO

    def log_pys(thetas, ys, x):
        """ creates an array of log p(y) for each y in ys
        which are estimated by using the ELBO
        log p(y) => E_r(w) [ log p(y,w)-log r(w)]
        ys has shape [y_samples, N_data] """

        #  get E_r(w)[p(y,w) - r(w)] for each w, y
        elbos = np.array([elbo(theta, x, y) for theta, y in zip(thetas, ys)])

        return elbos

    def kl_objective(params_phi, params_theta, t):
        """
        Provides a stochastic estimate of the kl divergence
        kl[p(y)|p_GP(y)] = E_p(y) [log p(y) -log p_gp(y)]
                         = -H[ p(y) ] -E_p(y) [log p_gp(y)]
        using :
        params_phi        dim = [2*N_weights]
        params_theta      list of [2*N_weights] : the var params of each r(w|theta)

        phi_mean, phi_log_std  |  dim = [N_weights]

        w_phi        |  dim = [N_samples, N_weights]
        y_bnn        |  dim = [N_data, N_weights_samples]

        kl             |  dim = [1] """

        phi_mean, phi_log_std = unpack_params(params_phi)
        w_phi = rs.randn(N_samples, N_weights) * np.exp(phi_log_std) + phi_mean
        x = np.random.uniform(low=-10, high=10, size=(n_data, 1))  # X ~ p(X)

        f_bnn = predictions(w_phi, x)[:, :, 0].T  # shape [N_data, N_weights_samples] f ~ p(f)
        y_bnn = f_bnn + 3*noise_var*rs.randn(n_data, N_samples)  # y = f + e ; y ~ p(y)

        # use monte carlo to approx H[p(y)] = E_p(y)[ log p(y)]
        entropy = np.mean(log_pys(params_theta, y_bnn.T, x))

        # use monte carlo to approx E_p(y) [log p_gp(y)]
        expected_log_gpp = np.mean(log_gp_prior(y_bnn, x))

        kl_div = entropy - expected_log_gpp

        return kl_div  # the KL

    grad_kl = grad(kl_objective, argnum=(0, 1))

    return N_weights, predictions, sample_gpp, unpack_params, kl_objective, grad_kl
Example #59
0
def prepare_loss_node(loss, opt_args_ls=None):
    if global_settings.backend == 'autograd':
        return ag.grad(loss, opt_args_ls)
    elif global_settings.backend == 'pytorch':
        return loss
Example #60
0
ub_sigma = np.full((nEtaBins, nEtaBins, nPtBins, nPtBins), 10).flatten()
ub_nsig = np.full((nEtaBins, nEtaBins, nPtBins, nPtBins), 20.).flatten()

ub_scale[idx] = np.full(len(idx), 1.)
ub_sigma[idx] = np.full(len(idx), -3.5)
ub_nsig[idx] = np.full(len(idx), 6.9)

lb = np.concatenate((lb_scale, lb_sigma, lb_nsig), axis=None)
ub = np.concatenate((ub_scale, ub_sigma, ub_nsig), axis=None)

constraints = LinearConstraint(A=np.eye(x.shape[0]),
                               lb=lb,
                               ub=ub,
                               keep_feasible=True)

grad = grad(nll)
hess = hessian(nll)

res = minimize(nll, x, args=(nEtaBins,nPtBins,datasetJ,datasetJgen),\
 method = 'trust-constr',jac = grad, hess=SR1(), constraints = constraints,\
 options={'verbose':3,'disp':True,'maxiter' : 100000, 'gtol' : 0., 'xtol' : xtol, 'barrier_tol' : btol})

print res

good_idx = np.where((np.sum(datasetJgen, axis=2) > 1000.).flatten())[0]

sep = nEtaBins * nEtaBins * nPtBins * nPtBins
good_idx = np.concatenate((good_idx, good_idx + sep, good_idx + 2 * sep),
                          axis=None)

fitres = res.x[good_idx]