Ejemplo n.º 1
0
def test_hvp():
    fun = lambda a: np.sum(np.sin(a))
    a = npr.randn(5)
    v = npr.randn(5)
    H = hessian(fun)(a)
    hvp = make_hvp(fun)(a)[0]
    check_equivalent(np.dot(H, v), hvp(v))
Ejemplo n.º 2
0
def test_hessian():
    # Check Hessian of a quadratic function.
    D = 5
    H = npr.randn(D, D)
    def fun(x):
        return np.dot(np.dot(x, H),x)
    hess = hessian(fun)
    x = npr.randn(D)
    check_equivalent(hess(x), H + H.T)
Ejemplo n.º 3
0
    print "Gap percentiles [1, 50, 99] %s" % str(
        np.percentile(gaps, [1, 50, 99]))

    #########################################
    # test per mu_n function and gradient   #
    #########################################
    n = 0
    lbn, lbs = make_lower_bound_MoGn(theta, n, s2min=1e-7)
    thn = theta[n, :D]
    assert np.isclose(lower_bound_MoG(theta), lbn(thn)), "per n is bad"
    from autograd.util import quick_grad_check, nd
    quick_grad_check(lbn, thn)

    print "Hessiandiag, numeric hessian diag"
    hlbn = hessian(lbn)
    print np.diag(hlbn(thn))

    hdiag = numeric_hessian_diag(lbn, thn)
    print hdiag

    #####################################
    # Test NVPI on a small, 2d example  #
    #####################################
    from vbproj.vboost import mog
    means = np.array([[1., 1.], [-1., -1.], [-1, 1]])
    covs = np.array([2 * np.eye(2), 1 * np.eye(2), 1 * np.eye(2)])
    icovs = np.array([np.linalg.inv(c) for c in covs])
    lndets = np.array([np.linalg.slogdet(c)[1] for c in covs])
    pis = np.ones(means.shape[0]) / float(means.shape[0])
    lnpdf = lambda z: mog.mog_logprob(z, means, icovs, lndets, pis)
Ejemplo n.º 4
0
def draw_it(func, **kwargs):
    view = [10, 150]
    if 'view' in kwargs:
        view = kwargs['view']

    # generate input space for plotting
    w_in = np.linspace(-5, 5, 100)
    w1_vals, w2_vals = np.meshgrid(w_in, w_in)
    w1_vals.shape = (len(w_in)**2, 1)
    w2_vals.shape = (len(w_in)**2, 1)
    w_vals = np.concatenate((w1_vals, w2_vals), axis=1).T
    w1_vals.shape = (len(w_in), len(w_in))
    w2_vals.shape = (len(w_in), len(w_in))

    # compute grad vals
    grad = compute_grad(func)
    grad_vals = [grad(s) for s in w_vals.T]
    grad_vals = np.asarray(grad_vals)

    # compute hessian
    hess = hessian(func)
    hess_vals = [hess(s) for s in w_vals.T]

    # define figure
    fig = plt.figure(figsize=(9, 6))

    ###  plot original function ###
    ax1 = plt.subplot2grid((3, 6), (0, 3), colspan=1, projection='3d')

    # evaluate function, reshape
    g_vals = func(w_vals)
    g_vals.shape = (len(w_in), len(w_in))

    # plot function surface
    ax1.plot_surface(w1_vals,
                     w2_vals,
                     g_vals,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax1.set_title(r'$g(w_1,w_2)$', fontsize=10)

    # cleanup axis
    cleanup(g_vals, view, ax1)

    ### plot first derivative functions ###
    ax2 = plt.subplot2grid((3, 6), (1, 2), colspan=1, projection='3d')
    ax3 = plt.subplot2grid((3, 6), (1, 4), colspan=1, projection='3d')

    # plot first function
    grad_vals1 = grad_vals[:, 0]
    grad_vals1.shape = (len(w_in), len(w_in))
    ax2.plot_surface(w1_vals,
                     w2_vals,
                     grad_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax2.set_title(r'$\frac{\partial}{\partial w_1}g(w_1,w_2)$', fontsize=10)

    # cleanup axis
    cleanup(grad_vals1, view, ax2)

    # plot second
    grad_vals1 = grad_vals[:, 1]
    grad_vals1.shape = (len(w_in), len(w_in))
    ax3.plot_surface(w1_vals,
                     w2_vals,
                     grad_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax3.set_title(r'$\frac{\partial}{\partial w_2}g(w_1,w_2)$', fontsize=10)

    # cleanup axis
    cleanup(grad_vals1, view, ax3)

    ### plot second derivatives ###
    ax4 = plt.subplot2grid((3, 6), (2, 1), colspan=1, projection='3d')
    ax5 = plt.subplot2grid((3, 6), (2, 3), colspan=1, projection='3d')
    ax6 = plt.subplot2grid((3, 6), (2, 5), colspan=1, projection='3d')

    # plot first hessian function
    hess_vals1 = np.asarray([s[0, 0] for s in hess_vals])
    hess_vals1.shape = (len(w_in), len(w_in))
    ax4.plot_surface(w1_vals,
                     w2_vals,
                     hess_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax4.set_title(
        r'$\frac{\partial}{\partial w_1}\frac{\partial}{\partial w_1}g(w_1,w_2)$',
        fontsize=10)

    # cleanup axis
    cleanup(hess_vals1, view, ax4)

    # plot second hessian function
    hess_vals1 = np.asarray([s[1, 0] for s in hess_vals])
    hess_vals1.shape = (len(w_in), len(w_in))
    ax5.plot_surface(w1_vals,
                     w2_vals,
                     hess_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax5.set_title(
        r'$\frac{\partial}{\partial w_1}\frac{\partial}{\partial w_2}g(w_1,w_2)=\frac{\partial}{\partial w_2}\frac{\partial}{\partial w_1}g(w_1,w_2)$',
        fontsize=10)

    # cleanup axis
    cleanup(hess_vals1, view, ax5)

    # plot first hessian function
    hess_vals1 = np.asarray([s[1, 1] for s in hess_vals])
    hess_vals1.shape = (len(w_in), len(w_in))
    ax6.plot_surface(w1_vals,
                     w2_vals,
                     hess_vals1,
                     alpha=0.1,
                     color='w',
                     zorder=1,
                     rstride=15,
                     cstride=15,
                     linewidth=0.5,
                     edgecolor='k')
    ax6.set_title(
        r'$\frac{\partial}{\partial w_2}\frac{\partial}{\partial w_2}g(w_1,w_2)$',
        fontsize=10)

    # cleanup axis
    cleanup(hess_vals1, view, ax6)
    plt.show()
Ejemplo n.º 5
0
def test_hessian_tensor_product():
    fun = lambda a: np.sum(np.sin(a))
    a = npr.randn(5, 4, 3)
    V = npr.randn(5, 4, 3)
    H = hessian(fun)(a)
    check_equivalent(np.tensordot(H, V, axes=np.ndim(V)), hessian_vector_product(fun)(a, V))
Ejemplo n.º 6
0
def test_hessian_vector_product():
    fun = lambda a: np.sum(np.sin(a))
    a = npr.randn(5)
    v = npr.randn(5)
    H = hessian(fun)(a)
    check_equivalent(np.dot(H, v), hessian_vector_product(fun)(a, v))
Ejemplo n.º 7
0
def test_hessian_matrix_product():
    fun = lambda a: np.sum(np.sin(a))
    a = npr.randn(5, 4)
    V = npr.randn(5, 4)
    H = hessian(fun)(a)
    check_equivalent(np.tensordot(H, V), hessian_tensor_product(fun)(a, V))
    def optimize(W0, compute_hessian=False):
        def compute_fprime_(Eta, Xi, s02):
            return fprime_m(Eta, compute_var(Xi, s02)) * Xi

        def compute_f_(Eta, Xi, s02):
            return pop_rate_fn(Eta, compute_var(Xi, s02))

        def compute_us(W, fval, fprimeval):
            W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                W)
            u0 = u_fn(XX, fval, W0x, W0y, k0, kappa, T0)
            u1 = u_fn(XX, fval, W1x, W1y, k0, kappa, T0) + u_fn(
                XX, fval, W0x, W0y, k1, kappa, T0) + u_fn(
                    XX, fval, W0x, W0y, k0, kappa, T1)
            u2 = u_fn(XXp, fprimeval, W2x, W2y, k0, kappa, T0) + u_fn(
                XXp, fprimeval, W0x, W0y, k2, kappa, T0) + u_fn(
                    XXp, fprimeval, W0x, W0y, k0, kappa, T2)
            u3 = u_fn(XXp, fprimeval, W3x, W3y, k0, kappa, T0) + u_fn(
                XXp, fprimeval, W0x, W0y, k3, kappa, T0) + u_fn(
                    XXp, fprimeval, W0x, W0y, k0, kappa, T3)
            return u0, u1, u2, u3

        def compute_f_fprime_t_(W,
                                perturbation,
                                max_dist=1):  # max dist added 10/14/20
            W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                W)
            fval = compute_f_(Eta, Xi, s02)
            fprimeval = compute_fprime_(Eta, Xi, s02)
            u0, u1, u2, u3 = compute_us(W, fval, fprimeval)
            resEta = Eta - u0 - u2
            resXi = Xi - u1 - u3
            YY = fval + perturbation
            YYp = fprimeval + 0

            def dYYdt(YY, Eta1, Xi1):
                return -YY + compute_f_(Eta1, Xi1, s02)

            def dYYpdt(YYp, Eta1, Xi1):
                return -YYp + compute_fprime_(Eta1, Xi1, s02)

            for t in range(niter):
                if np.mean(np.abs(YY - fval)) < max_dist:
                    u0, u1, u2, u3 = compute_us(W, YY, YYp)
                    Eta1 = resEta + u0 + u2
                    Xi1 = resXi + u1 + u3
                    YY = YY + dt * dYYdt(YY, Eta1, Xi1)
                    YYp = YYp + dt * dYYpdt(YYp, Eta1, Xi1)
                elif np.remainder(t, 500) == 0:
                    print('unstable fixed point?')

            #YYprime = compute_fprime_(Eta1,Xi1,s02)

            return YY, YYp

        def compute_f_fprime_t_avg_(W, perturbation, burn_in=0.5, max_dist=1):
            W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                W)
            fval = compute_f_(Eta, Xi, s02)
            fprimeval = compute_fprime_(Eta, Xi, s02)
            u0, u1, u2, u3 = compute_us(W, fval, fprimeval)
            resEta = Eta - u0 - u2
            resXi = Xi - u1 - u3
            YY = fval + perturbation
            YYp = fprimeval + 0

            YYmean = np.zeros_like(Eta)
            YYprimemean = np.zeros_like(Eta)

            def dYYdt(YY, Eta1, Xi1):
                return -YY + compute_f_(Eta1, Xi1, s02)

            def dYYpdt(YYp, Eta1, Xi1):
                return -YYp + compute_fprime_(Eta1, Xi1, s02)

            for t in range(niter):
                if np.mean(np.abs(YY - fval)) < max_dist:
                    u0, u1, u2, u3 = compute_us(W, YY, YYp)
                    Eta1 = resEta + u0 + u2
                    Xi1 = resXi + u1 + u3
                    YY = YY + dt * dYYdt(YY, Eta1, Xi1)
                    YYp = YYp + dt * dYYpdt(YYp, Eta1, Xi1)
                elif np.remainder(t, 500) == 0:
                    print('unstable fixed point?')
                if t > niter * burn_in:
                    YYmean = YYmean + 1 / niter / burn_in * YY
                    YYprimemean = YYprimemean + 1 / niter / burn_in * YYp

            return YYmean, YYprimemean

        def u_fn(XX, YY, Wx, Wy, K, kappa, T):
            WWx, WWy = [gen_Weight(W, K, kappa, T) for W in [Wx, Wy]]
            return XX @ WWx + YY @ WWy

        def minusLW(W):
            def compute_sq_error(a, b, wt):
                return np.sum(wt * (a - b)**2)

            def compute_kl_error(mu_data, pc_list, mu_model, fprimeval, wt):
                # how to model variability in X?
                kl = compute_kl_divergence(fprimeval, noise, mu_data, mu_model,
                                           pc_list)
                return kl  #wt*kl
                # principled way would be to use 1/wt for noise term. Should add later.

            def compute_opto_error(W, wt=None):
                if wt is None:
                    wt = np.ones((nN, nQ * nS * nT))
                W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                    W)
                WWy = gen_Weight(W0y, k, kappa, T)
                Phi = fprime_m(Eta, compute_var(Xi, s02))
                dHH = np.zeros((nN, nQ * nS * nT))
                dHH[:, np.arange(2, nQ * nS * nT, nQ)] = 1
                dHH = dHH * h
                print('dYY: ' + str(dYY.shape))
                print('Phi: ' + str(Phi.shape))
                print('dHH: ' + str(dHH.shape))
                cost = np.sum(wt * (dYY - (dYY @ WWy) * Phi - dHH * Phi)**2)
                return cost

            def compute_opto_error_with_inv(W, wt):
                if wt is None:
                    wt = np.ones((nN, nQ * nS * nT))
                W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                    W)
                WWy = gen_Weight(W0y, k0, kappa, T0)
                Phi = fprime_m(Eta, compute_var(Xi, s02))
                Phi1 = np.array([np.diag(phi) for phi in Phi])
                invmat = np.array([
                    np.linalg.inv(np.eye(nQ * nS * nT) - WWy @ phi1)
                    for phi1 in Phi1
                ])
                dHH = np.zeros((nN, nQ * nS * nT))
                dHH[:, np.arange(2, nQ * nS * nT, nQ)] = 1
                dHH = dHH * h
                print('dYY: ' + str(dYY.shape))
                print('Phi: ' + str(Phi.shape))
                print('dHH: ' + str(dHH.shape))
                invprod = np.einsum('ij,ijk->ik', dHH, Phi1)
                invprod = np.einsum('ij,ijk->ik', invprod, invmat)
                cost = np.sum(wt * (dYY - invprod)**2)
                return cost

            def compute_isn_error(W):
                W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                    W)
                Phi = fprime_m(Eta, compute_var(Xi, s02))
                log_arg = Phi[:, 0] * W0y[0, 0] - 1
                cost = utils.minus_sum_log_ceil(log_arg, big_val / nN)
                return cost

            def compute_tv_error(W):
                # sq l2 norm for tv error
                W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                    W)
                topo_var_list = [arr.reshape(topo_shape+(-1,)) for arr in \
                        [XX,XXp,Eta,Xi]]
                sqdiffy = [
                    np.sum(np.abs(np.diff(top, axis=0))**2)
                    for top in topo_var_list
                ]
                sqdiffx = [
                    np.sum(np.abs(np.diff(top, axis=1))**2)
                    for top in topo_var_list
                ]
                cost = np.sum(sqdiffy + sqdiffx)
                return cost

            W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                W)

            perturbation = perturbation_size * np.random.randn(*Eta.shape)

            fval, fprimeval = compute_f_fprime_t_avg_(
                W, perturbation
            )  # Eta the mean input per cell, Xi the stdev. input per cell, s02 the baseline variability in input

            Xterm = compute_kl_error(XXhat, Xpc_list, XX, XXp, wtStim *
                                     wtInp)  # XX the modeled input layer (L4)
            Yterm = compute_kl_error(
                YYhat, Ypc_list, fval, fprimeval,
                wtStim * wtCell)  # fval the modeled output layer (L2/3)
            u0, u1, u2, u3 = compute_us(W, fval, fprimeval)
            #u0 = u_fn(XX,fval,W0x,W0y,k,kappa,T)
            #u1 = u_fn(XX,fval,W1x,W1y,k,kappa,T)
            #u2 = u_fn(XXp,fprimeval,W2x,W2y,k,kappa,T)
            #u3 = u_fn(XXp,fprimeval,W3x,W3y,k,kappa,T)
            Etaterm = compute_sq_error(
                Eta, u0 + u2,
                wtStim * wtCell)  # magnitude of fudge factor in mean input
            Xiterm = compute_sq_error(
                Xi, u1 + u3, wtStim *
                wtCell)  # magnitude of fudge factor in input variability
            # returns value float
            Optoterm = compute_opto_error_with_inv(
                W, wtStimOpto * wtCellOpto)  #testing out 8/20/20
            cost = wtX * Xterm + wtY * Yterm + wtEta * Etaterm + wtXi * Xiterm + wtOpto * Optoterm
            if constrain_isn:
                ISNterm = compute_isn_error(W)
                cost = cost + wtISN * ISNterm

            if tv:
                TVterm = compute_tv_error(W)
                cost = cost + wtTV * TVterm

            if isinstance(Xterm, float):
                print('X:%f' % (wtX * Xterm))
                print('Y:%f' % (wtY * Yterm))
                print('Eta:%f' % (wtEta * Etaterm))
                print('Xi:%f' % (wtXi * Xiterm))
                print('Opto:%f' % (wtOpto * Optoterm))
                if constrain_isn:
                    print('ISN:%f' % (wtISN * ISNterm))
                if tv:
                    print('TV:%f' % (wtTV * TVterm))

            lbls = ['cost']
            vars = [cost]
            for lbl, var in zip(lbls, vars):
                print_labeled(lbl, var)
            return cost

        def minusdLdW(W):
            # returns value (R,)
            # sum in first dimension: (N,1) times (N,1) times (N,P)
            #             return jacobian(minusLW)(W)
            return grad(minusLW)(W)

        def fix_violations(w, bounds):
            lb = np.array([b[0] for b in bounds])
            ub = np.array([b[1] for b in bounds])
            #print('w shape: '+str(w.shape))
            #print('bd shape: '+str(lb.shape))
            lb_violation = w < lb
            ub_violation = w > ub
            w[lb_violation] = lb[lb_violation]
            w[ub_violation] = ub[ub_violation]
            return w, lb_violation, ub_violation

        def sorted_r_eigs(w):
            drW, prW = np.linalg.eig(w)
            srtinds = np.argsort(drW)
            return drW[srtinds], prW[:, srtinds]

        def compute_eig_penalty_(Wmy, K0, kappa, T0):
            # still need to finish! Hopefully won't need
            # need to fix this to reflect addition of kappa argument
            Wsquig = gen_Weight(Wmy, K0, kappa, T0)
            drW, prW = sorted_r_eigs(Wsquig - np.eye(nQ * nS * nT))
            plW = np.linalg.inv(prW)
            eig_outer_all = [
                np.real(np.outer(plW[:, k], prW[k, :]))
                for k in range(nS * nQ * nT)
            ]
            eig_penalty_size_all = [
                barrier_wt / np.abs(np.real(drW[k]))
                for k in range(nS * nQ * nT)
            ]
            eig_penalty_dir_w = [
                eig_penalty_size *
                ((eig_outer[:nQ, :nQ] + eig_outer[nQ:, nQ:]) +
                 K0[np.newaxis, :] *
                 (eig_outer[:nQ, nQ:] + kappa * eig_outer[nQ:, :nQ]))
                for eig_outer, eig_penalty_size in zip(eig_outer_all,
                                                       eig_penalty_size_all)
            ]
            eig_penalty_dir_k = [
                eig_penalty_size *
                ((eig_outer[:nQ, nQ:] + eig_outer[nQ:, :nQ] * kappa) *
                 W0my).sum(0) for eig_outer, eig_penalty_size in zip(
                     eig_outer_all, eig_penalty_size_all)
            ]
            eig_penalty_dir_kappa = [
                eig_penalty_size *
                (eig_outer[nQ:, :nQ] * k0[np.newaxis, :] * W0my).sum().reshape(
                    (1, )) for eig_outer, eig_penalty_size in zip(
                        eig_outer_all, eig_penalty_size_all)
            ]
            eig_penalty_dir_w = np.array(eig_penalty_dir_w).sum(0)
            eig_penalty_dir_k = np.array(eig_penalty_dir_k).sum(0)
            eig_penalty_dir_kappa = np.array(eig_penalty_dir_kappa).sum(0)
            return eig_penalty_dir_w, eig_penalty_dir_k, eig_penalty_dir_kappa

        def compute_eig_penalty(W):
            # still need to finish! Hopefully won't need
            W0x, W0y, W1x, W1y, W2x, W2y, W3x, W3y, s02, k0, k1, k2, k3, kappa, T0, T1, T2, T3, XX, XXp, Eta, Xi, h = parse_W(
                W)
            eig_penalty_dir_w, eig_penalty_dir_k, eig_penalty_dir_kappa = compute_eig_penalty_(
                W0my, k0, kappa0)
            eig_penalty_W = unparse_W(np.zeros_like(W0mx), eig_penalty_dir_w,
                                      np.zeros_like(W0sx), np.zeros_like(W0sy),
                                      np.zeros_like(s020),
                                      eig_penalty_dir_k, eig_penalty_dir_kappa,
                                      np.zeros_like(XX0), np.zeros_like(XXp0),
                                      np.zeros_like(Eta0), np.zeros_like(Xi0))
            #             assert(True==False)
            return eig_penalty_W

        allhot = np.zeros(W0.shape)
        #allhot[:nP*nQ+nQ**2] = 1
        allhot[:4 * (nP * nQ + nQ**2)] = 1  # penalizing all Wn equally
        W_l2_reg = lambda W: np.sum((W * allhot)**2)
        f = lambda W: minusLW(W) + l2_penalty * W_l2_reg(W)
        fprime = lambda W: minusdLdW(W) + 2 * l2_penalty * W * allhot

        fix_violations(W0, bounds)

        W1, loss, result = sop.fmin_l_bfgs_b(f,
                                             W0,
                                             fprime=fprime,
                                             bounds=bounds,
                                             factr=1e4,
                                             maxiter=int(1e3))
        if compute_hessian:
            gr = grad(minusLW)(W1)
            hess = hessian(minusLW)(W1)
        else:
            gr = None
            hess = None

#         W0mx,W0my,W0sx,W0sy,s020,k0,kappa0,XX0,XXp0,Eta0,Xi0 = parse_W(W1)
#W0x,W0y,W1x,W1y,W2x,W2y,W3x,W3y,s02,k,kappa,T,XX,XXp,Eta,Xi,h = parse_W(W)

        return W1, loss, gr, hess, result
        #Reset trace
        if done == 0:
            z = np.zeros((task.nactions, task.nstates))
        # Update trace
        z = np.outer(u, x) + w[2] * w[3] * z
        # Compute RPE
        rpe = r + w[2] * np.einsum('i,ij,j->', u_, Q, x_) - np.einsum(
            'i,ij,j->', u, Q, x)
        # Update value function
        Q += w[0] * rpe * z
    return L


# Compare with autograd and the SARSASoftmax object.
print(' AUTOGRAD  \n\n ')
agH = hessian(f)(w)
print(agH)

print('\n\n FITR (RAW)  \n\n ')
print(hess_)

print('\n\n FITR (OBJECT) \n\n')
print(agent_inv.hess_)

agh = hessian(fQ)(w)
print(' AUTOGRAD  \n\n ')
print(' Learning rate \n')
print(agh[:, :, 0, 0])

print('\n\n Discount \n')
print(agh[:, :, 2, 2])
Ejemplo n.º 10
0
    w2 = w + eps * v
    negL1, _, _ = nll_GLM_GanmorCalciumAR1(w1, Xmat, Yobs, hyperparams, nlfun)
    negL2, _, _ = nll_GLM_GanmorCalciumAR1(w2, Xmat, Yobs, hyperparams, nlfun)
    gradient_finite_diff[i] = (negL2 - negL1) / (2.0 * eps)

    # if want finite difference computation of Hessian, uncomment this code
    # note this is redundant because it computes both upper and lower triangular elements
    # for j, v2 in enumerate(np.eye(D)):
    #     wp = w + eps * v + eps * v2
    #     wm1 = w + eps * v
    #     wm2 = w + eps * v2
    #     negLp, _, _ = nll_GLM_GanmorCalciumAR1(wp, Xmat, Yobs, hyperparams, nlfun)
    #     negLm1, _, _ = nll_GLM_GanmorCalciumAR1(wm1, Xmat, Yobs, hyperparams, nlfun)
    #     negLm2, _, _ = nll_GLM_GanmorCalciumAR1(wm2, Xmat, Yobs, hyperparams, nlfun)
    #     hess_finite_diff[i,j] = (negLp - negLm1 - negLm2 + negL) / (eps**2)
print("Done.")

# autograd
grad_w = grad(
    lambda w: nll_GLM_GanmorCalciumAR1(w, Xmat, Yobs, hyperparams, nlfun)[0])
gradient_autograd = grad_w(w)
hess_w = hessian(
    lambda w: nll_GLM_GanmorCalciumAR1(w, Xmat, Yobs, hyperparams, nlfun)[0])
H_autograd = hess_w(w)

# diffs
print("Gradient vs. Autograd    : ",
      np.linalg.norm(gradient_autograd - gradient))
print("Gradient vs. finite diffs: ",
      np.linalg.norm(gradient_finite_diff - gradient))
print("Hessian  vs. Autograd    : ", np.linalg.norm(H - H_autograd))
Ejemplo n.º 11
0
 def hessian_log_likelihood(self, params: np.ndarray, x: np.ndarray,
                            idx_params: np.ndarray):
     return hessian(self.log_likelihood, argnum=0)(params, x, idx_params)
Ejemplo n.º 12
0
def part2(target, link_length, min_roll, max_roll, min_pitch, max_pitch, min_yaw, max_yaw, obstacles):
    """Function that uses optimization to do inverse kinematics for a snake robot

    Args:
        target:  [x, y, z, q0, q1, q2, q3]' position and orientation of the end effector
        link_length:  Nx1 vectors of the lengths of the links
        min_xxx, max_xxx are the vectors of the limits on the roll, pitch, yaw of each link.
        obstacles: A Mx4 matrix where each row is [ x y z radius ] of a sphere obstacle.
        M obstacles.

    Returns:
        r: N vector of roll
        p: N vector of pitch
        y: N vector of yaw

    """
    N = len(link_length)
    def func(x0):
        pos = np.array([0,0,0])
        qM = np.eye(3)
        rs = x0[:N]
        ps = x0[N:2*N]
        ys = x0[2*N:]
        ll = link_length
        for r,p,y,l in zip(rs,ps,ys,ll):
            pos,qM = fwd(pos,l,r,p,y,qM)
        t = np.array(target)
        C = 1 # meters and radians. Close enough
        extra = 0.0
        for ob in obstacles:
            p0 = np.array([0,0,0])
            q = np.eye(3)
            for r,p,y,l in zip(rs,ps,ys,ll):
                p1,q = fwd(p0,l,r,p,y,q)
                i1,i2 = sphere_line_intersection(p0,p1,ob[:3],ob[3])
                p0 = p1
                if i1 is not None:
                    i1 = np.array(i1)
                    extra += (((ob[:3]-i1)**2-ob[3])**2).sum()
                if i2 is not None:
                    i2 = np.array(i2)
                    extra += (((ob[:3]-i2)**2-ob[3])**2).sum()
        quat = transforms3d.quaternions.mat2quat(qM)
        rot_error = 1.0 - ((quat*np.array([t[3],-t[4],-t[5],-t[6]]))**2 ).sum()
        return ((pos[:3]-t[:3])**2).sum() + C*rot_error + extra

    bounds =          [(x,y) for x,y in zip(min_roll, max_roll)]
    bounds = bounds + [(x,y) for x,y in zip(min_pitch,max_pitch)]
    bounds = bounds + [(x,y) for x,y in zip(min_yaw,  max_yaw)]
    
    midpoint = lambda mn,mx: mn+0.5*(mx-mn)
    x0 = [midpoint(min_roll[i],max_roll[i]) for i in range(N)] + [midpoint(min_pitch[i],max_pitch[i]) for i in range(N)] + [midpoint(min_yaw[i],  max_yaw[i]) for i in range(N)] 
    x0 = np.array(x0) + 1e-6
    jac = grad(func)
    hess = hessian(func)
    def jac_reg(x):
        j = jac(x)
        if np.isfinite(j).all():
            return j
        else:
            return opt.approx_fprime(x0,func,1e-6)
    print(jac(x0))
    if False:     # quat should be norm 1 ?
        eps = 1e-3
        constraints =               [{'type:': 'eq', 'fun': lambda x: (x[3]**2 + x[4]**2 + x[5]**2 + x[6]**2) > 1.0-eps }]
        constraints = constraints + [{'type:': 'eq', 'fun': lambda x: (x[3]**2 + x[4]**2 + x[5]**2 + x[6]**2) < 1.0+eps }]
    else:
        constraints = []

    for ob in obstacles:
        pass #soft for now?

    # I think only method='SLSQP' is good?
    # L-BFGS-B, TNC and SLSQP
    # Powell, SLSQP, COBYLA
    if False:
        import cma
        es = cma.CMAEvolutionStrategy(x0, pi/2.0, {'bounds':list(zip(*bounds))})
        es.optimize(func)
        print(es.result_pretty())
        resx = es.result.xbest
    else:
        res = opt.minimize(func,x0=x0,bounds=bounds,constraints=constraints,method='Powell',jac=jac_reg)
        print(res)
        resx = res.x
    return resx[:N], resx[N:2*N], resx[2*N:]
Ejemplo n.º 13
0
        raise ValueError('Upper bound must be greater than lower bound')
    if ub == float("inf"):
        if lb == -float("inf"):
            # TODO: I'm not sure this copy work with autodiff.
            return copy.copy(free_vec)
        else:
            return np.exp(free_vec) + lb
    else: # the upper bound is finite
        if lb == -float("inf"):
            return ub - np.exp(-1 * free_vec)
        else:
            exp_vec = np.exp(free_vec)
            return (ub - lb) * exp_vec / (1 + exp_vec) + lb

constrain_scalar_jac = autograd.jacobian(constrain)
constrain_scalar_hess = autograd.hessian(constrain)

def get_inbounds_value(lb, ub):
    assert lb < ub
    if lb > -float('inf') and ub < float('inf'):
        return 0.5 * (ub - lb)
    else:
        if lb > -float('inf'):
            # The upper bound is infinite.
            return lb + 1.0
        elif ub < float('inf'):
            # The lower bound is infinite.
            return ub - 1.0
        else:
            # Both are infinie.
            return 0.0
Ejemplo n.º 14
0

#scale value function
def square_loss(pred, y):
    return 0.5 * (y - pred)**2


def logistic_loss(pred, y):
    return -(y * anp.log(pred) + (1 - y) * anp.log(1 - pred))


# square_loss_grad(pred,y)
# square_loss_hess(pred,y)
square_loss_grad = grad(
    square_loss)  # square_loss with respect to  pred, grad = pred-y
square_loss_hess = hessian(
    square_loss)  # square_loss_grad with respect to pred, hess = 1

print square_loss_grad(0.0, 0.5)  # -0.5
print square_loss_hess(0.0, 0.5)  # 1

# logistic_loss_grad(pred,y)
# logistic_loss_hess(pred,y)
logistic_loss_grad = grad(
    logistic_loss
)  # logistic_loss with respect to  pred, grad = (1-y)/(1-pred) - y/pred
logistic_loss_hess = hessian(
    logistic_loss
)  #logistic_loss_grad with respect to  pred, hess = y/pred**2 + (1-y)/(1-pred)**2

print logistic_loss_grad(0.2, 0)  # 1.25
print logistic_loss_hess(0.2, 0)  # 1.25
Ejemplo n.º 15
0
def newtons_method(g, max_its, w, **kwargs):
    # flatten input funciton, in case it takes in matrices of weights
    flat_g, unflatten, w = flatten_func(g, w)

    # compute the gradient / hessian functions of our input function -
    # note these are themselves functions.  In particular the gradient -
    # - when evaluated - returns both the gradient and function evaluations (remember
    # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use
    # an Automatic Differntiator to evaluate the gradient)
    gradient = value_and_grad(flat_g)
    hess = hessian(flat_g)

    # set numericxal stability parameter / regularization parameter
    epsilon = 10**(-7)
    if 'epsilon' in kwargs:
        beta = kwargs['epsilon']

    # run the newtons method loop
    weight_history = []  # container for weight history
    cost_history = []  # container for corresponding cost function history
    for k in range(max_its):
        # evaluate the gradient, store current weights and cost function value
        cost_eval, grad_eval = gradient(w)
        weight_history.append(unflatten(w))
        cost_history.append(cost_eval)

        # evaluate the hessian
        hess_eval = hess(w)

        # reshape for numpy linalg functionality
        hess_eval.shape = (int(
            (np.size(hess_eval))**(0.5)), int((np.size(hess_eval))**(0.5)))

        # solve second order system system for weight update
        w = w - np.dot(
            np.linalg.pinv(hess_eval + epsilon * np.eye(np.size(w))),
            grad_eval)

    # collect final weights
    weight_history.append(unflatten(w))
    # compute final cost function value via g itself (since we aren't computing
    # the gradient at the final step we don't get the final cost function value
    # via the Automatic Differentiatoor)
    cost_history.append(flat_g(w))
    return weight_history, cost_history

    # gradient descent function - inputs: g (input function), alpha (steplength parameter), max_its (maximum number of iterations), w (initialization)
    def gradient_descent(g, alpha_choice, max_its, w):
        # compute the gradient function of our input function - note this is a function too
        # that - when evaluated - returns both the gradient and function evaluations (remember
        # as discussed in Chapter 3 we always ge the function evaluation 'for free' when we use
        # an Automatic Differntiator to evaluate the gradient)
        gradient = value_and_grad(g)

        # run the gradient descent loop
        weight_history = []  # container for weight history
        cost_history = []  # container for corresponding cost function history
        alpha = 0
        for k in range(1, max_its + 1):
            # check if diminishing steplength rule used
            if alpha_choice == 'diminishing':
                alpha = 1 / float(k)
            else:
                alpha = alpha_choice

            # evaluate the gradient, store current weights and cost function value
            cost_eval, grad_eval = gradient(w)
            weight_history.append(w)
            cost_history.append(cost_eval)

            # take gradient descent step
            w = w - alpha * grad_eval

        # collect final weights
        weight_history.append(w)
        # compute final cost function value via g itself (since we aren't computing
        # the gradient at the final step we don't get the final cost function value
        # via the Automatic Differentiatoor)
        cost_history.append(g(w))
        return weight_history, cost_history
Ejemplo n.º 16
0
    def run(self, theta, niter=10, tol=.0001, verbose=False, path=""):
        """ runs NPV for ... iterations 
            mimics npv_run.m from Sam Gershman's original matlab code

            USAGE: [F mu s2] = npv_run(nlogpdf,theta,[nIter])

            INPUTS:
             theta - [N x D+1] initial parameter settings, where
                        N is the number of components,
                        D is the number of latent variables in the model,
                      and the last column contains the log bandwidths (variances)
              nIter (optional) - maximum number of iterations (default: 10)
              tol (optional) - change in the evidence lower bound (ELBO) for
              convergence (default: 0.0001)

            OUTPUTS:
              F - [nIter x 1] approximate ELBO value at each iteration
              mu - [N x D] component means
              s2 - [N x 1] component bandwidths
        """
        N, Dpp = theta.shape
        D = Dpp - 1

        # set LBFGS optim arguments
        disp = 10 if verbose else None
        opts = {
            'disp': disp,
            'maxiter': 5000,
            'gtol': 1e-7,
            'ftol': 1e-7
        }  #, 'factr':1e2}
        elbo_vals = np.zeros(niter)

        timestamps = []
        timestamps.append(time())
        for ii in xrange(niter):
            elbo_vals[ii] = self.mc_elbo(theta)
            print "iteration %d (elbo = %2.4f)" % (ii, elbo_vals[ii])

            # first-order approximation (L1): optimize mu, one component at a time
            print " ... optimizing mus "
            for n in xrange(N):
                print " ... %d / %d " % (n, N)
                fun, gfun = self.make_elbo1_funs(theta, n)
                res = minimize(fun,
                               x0=theta[n, :D],
                               jac=gfun,
                               method='L-BFGS-B',
                               options=opts)
                theta[n, :D] = res.x

            #print theta[:,:D]
            #print " ... elbo: ", self.mc_elbo(theta)

            # second-order approximation (L2): optimize s2
            print " ... optimizing sigmas"
            mu = theta[:, :D]
            h = np.zeros(N)
            for n in xrange(N):
                # compute Hessian trace using finite differencing or autograd
                h[n] = np.sum(np.diag(hessian(self.lnpdf)(mu[n])))

            fun, gfun = self.make_elbo2_funs(theta, h)
            res = minimize(fun,
                           x0=theta[:, -1],
                           jac=gfun,
                           method='L-BFGS-B',
                           options=opts)
            theta = np.column_stack([mu, res.x])

            #  mmd_samples = mogsamples(2000, theta)
            if (ii % 5 == 0):
                timestamps.append(time())
                np.savez(path + '/iter' + str(ii) + "of" + str(niter) + ".npz",
                         timestamps=timestamps,
                         mu=mu,
                         sigma=np.exp(theta[:, -1]) + self.s2min,
                         n_feval=self.lnpdf.counter)

            # calculate the approximate ELBO (L2)
            #if (ii > 1) and (np.abs(elbo_vals[ii] - elbo_vals[ii-1] < tol))
            # TODO check for convergence
            #if (ii > 1) and (np.abs(F[ii]-F[ii-1]) < tol)
            #    break # end % check for convergence

        # unpack params and return
        mu = theta[:, :D]
        s2 = np.exp(theta[:, -1]) + self.s2min
        return mu, s2, elbo_vals, theta
Ejemplo n.º 17
0
    def test_objective(self):
        model = Model(dim=3)
        objective = obj_lib.Objective(par=model.x, fun=model.f)

        model.set_inits()
        x_free = model.x.get_free()
        x_vec = model.x.get_vector()

        model.set_opt()
        self.assertTrue(objective.fun_free(x_free) > 0.0)
        np_test.assert_array_almost_equal(objective.fun_free(x_free),
                                          objective.fun_vector(x_vec))

        grad = objective.fun_free_grad(x_free)
        hess = objective.fun_free_hessian(x_free)
        np_test.assert_array_almost_equal(np.matmul(hess, grad),
                                          objective.fun_free_hvp(x_free, grad))

        self.assertTrue(objective.fun_vector(x_vec) > 0.0)
        grad = objective.fun_vector_grad(x_vec)
        hess = objective.fun_vector_hessian(x_vec)
        np_test.assert_array_almost_equal(
            np.matmul(hess, grad), objective.fun_vector_hvp(x_free, grad))

        # Test Jacobians.
        vec_objective = obj_lib.Objective(par=model.x, fun=model.get_x_vec)
        vec_jac = vec_objective.fun_vector_jacobian(x_vec)
        np_test.assert_array_almost_equal(model.b_mat, vec_jac)

        free_jac = vec_objective.fun_free_jacobian(x_free)
        x_free_to_vec_jac = \
            model.x.free_to_vector_jac(x_free).todense()
        np_test.assert_array_almost_equal(
            np.matmul(model.b_mat, np.transpose(x_free_to_vec_jac)), free_jac)

        # Test the preconditioning
        preconditioner = 2.0 * np.eye(model.dim)
        preconditioner[model.dim - 1, 0] = 0.1  # Add asymmetry for testing!
        objective.preconditioner = preconditioner

        np_test.assert_array_almost_equal(
            objective.fun_free_cond(x_free),
            objective.fun_free(np.matmul(preconditioner, x_free)),
            err_msg='Conditioned function values')

        fun_free_cond_grad = autograd.grad(objective.fun_free_cond)
        grad_cond = objective.fun_free_grad_cond(x_free)
        np_test.assert_array_almost_equal(
            fun_free_cond_grad(x_free),
            grad_cond,
            err_msg='Conditioned gradient values')

        fun_free_cond_hessian = autograd.hessian(objective.fun_free_cond)
        hess_cond = objective.fun_free_hessian_cond(x_free)
        np_test.assert_array_almost_equal(fun_free_cond_hessian(x_free),
                                          hess_cond,
                                          err_msg='Conditioned Hessian values')

        fun_free_cond_hvp = autograd.hessian_vector_product(
            objective.fun_free_cond)
        np_test.assert_array_almost_equal(
            fun_free_cond_hvp(x_free, grad_cond),
            objective.fun_free_hvp_cond(x_free, grad_cond),
            err_msg='Conditioned Hessian vector product values')
Ejemplo n.º 18
0
    elp += np.sum(Ez * log_likes)
    # assert np.all(np.isfinite(elp))

    return -1 * elp / scale

def hessian_neg_expected_log_joint(x, Ez, Ezzp1, scale=1):
    T, D = np.shape(x)
    x_mask = np.ones((T, D), dtype=bool)
    hessian_diag, hessian_lower_diag = latent_ddm.dynamics.hessian_expected_log_dynamics_prob(Ez, x, input, x_mask, tag)
    hessian_diag[:-1] += latent_ddm.transitions.hessian_expected_log_trans_prob(x, input, x_mask, tag, Ezzp1)
    hessian_diag += latent_ddm.emissions.hessian_log_emissions_prob(data, input, mask, tag, x)

    # The Hessian of the log probability should be *negative* definite since we are *maximizing* it.
    # hessian_diag -= 1e-8 * np.eye(D)

    # Return the scaled negative hessian, which is positive definite
    return -1 * hessian_diag / scale, -1 * hessian_lower_diag / scale

from autograd import hessian
from ssm.primitives import blocks_to_full

hess = hessian(neg_expected_log_joint)
H_autograd = hess(x, Ez, Ezzp1).reshape((T,T))

H_diag, H_lower_diag = hessian_neg_expected_log_joint(x, Ez, Ezzp1)
H = blocks_to_full(H_diag, H_lower_diag)

assert np.allclose(H,H_autograd)
print("All close: ", np.allclose(H,H_autograd,rtol=1e-8,atol=1e-8))
print("Norm difference: ",np.linalg.norm(H-H_autograd))
Ejemplo n.º 19
0
    def optimize(W0,compute_hessian=False):
        
        def compute_fprime_(Eta,Xi,s02):
#             Wmx,Wmy,Wsx,Wsy,s02,k,kappa,XX,YY,Eta,Xi = parse_W(W)
#             WWx,WWy = [gen_Weight(W,k,kappa) for W in [Wx,Wy]]
            return fprime_m(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))*Xi

        def compute_f_(Eta,Xi,s02):
            return pop_rate_fn(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)],axis=0))
        
        def compute_f_fprime_t_(W,perturbation):
            Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi,h1,h2 = parse_W(W)
            fval = compute_f_(Eta,Xi,s02)
            resEta = Eta - u_fn(XX,fval,Wmx,Wmy,k,kappa,T)
            resXi  = Xi - u_fn(XX,fval,Wsx,Wsy,k,kappa)
            YY = fval + perturbation
            def dYYdt(YY,Eta1,Xi1):
                return -YY + compute_f_(Eta1,Xi1,s02)
            for t in range(niter):
                Eta1 = resEta + u_fn(XX,YY,Wmx,Wmy,k,kappa)
                Xi1 = resXi + u_fn(XX,YY,Wsx,Wsy,k,kappa)
                YY = YY + dt*dYYdt(YY,Eta1,Xi1)
                
            YYprime = compute_fprime_(Eta1,Xi1,s02)
            
            return YY,YYprime
        
        def compute_f_fprime_t_avg_(W,perturbation,burn_in=0.5):
            Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = parse_W(W)
            fval = compute_f_(Eta,Xi,s02)
            resEta = Eta - u_fn(XX,fval,Wmx,Wmy,K,kappa,T)
            resXi  = Xi - u_fn(XX,fval,Wsx,Wsy,K,kappa,T)
            YY = fval + perturbation
            YYmean = np.zeros_like(Eta)
            YYprimemean = np.zeros_like(Eta)
            def dYYdt(YY,Eta1,Xi1):
                return -YY + compute_f_(Eta1,Xi1,s02)
            for t in range(niter):
                Eta1 = resEta + u_fn(XX,YY,Wmx,Wmy,K,kappa,T)
                Xi1 = resXi + u_fn(XX,YY,Wsx,Wsy,K,kappa,T)
                YY = YY + dt*dYYdt(YY,Eta1,Xi1)
                if t>niter*burn_in:
                    YYprime = compute_fprime_(Eta1,Xi1,s02)
                    YYmean = YYmean + 1/niter/burn_in*YY
                    YYprimemean = YYprimemean + 1/niter/burn_in*YYprime
                
            return YYmean,YYprimemean

        def u_fn(XX,YY,Wx,Wy,K,kappa,T):
            WWx,WWy = [gen_Weight(W,K,kappa,T) for W in [Wx,Wy]]
            #print(WWx.shape)
            #print(WWy.shape)
            #print_labeled('WWx',WWx)
            #print_labeled('WWy',WWy)
            #plt.figure(1)
            #plt.imshow(WWy)
            #plt.savefig('WWy.jpg',dpi=300)
            return XX @ WWx + YY @ WWy
                        
        def minusLW(W):
            
            def compute_sq_error(a,b,wt):
                return np.sum(wt*(a-b)**2)
            
            def compute_kl_error(mu_data,pc_list,mu_model,fprimeval,wt):
                # how to model variability in X?
                kl = compute_kl_divergence(fprimeval,noise,mu_data,mu_model,pc_list)
                return kl #wt*kl
                # principled way would be to use 1/wt for noise term. Should add later.

            def compute_opto_error(W):
                Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = parse_W(W)
                WWy = gen_Weight(Wmy,K,kappa,T)
                Phi = fprime_m(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))
                dHH = np.zeros((nN,nQ*nS*nT))
                dHH[:,np.arange(2,nQ*nS*nT,nQ)] = 1
                dHH = dHH*h
                print('dYY: '+str(dYY.shape))
                print('Phi: '+str(Phi.shape))
                print('dHH: '+str(dHH.shape))
                cost = np.sum((dYY - (dYY @ WWy) * Phi - dHH * Phi)**2)
                return cost

            def compute_opto_error_with_inv(W):
                Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = parse_W(W)
                WWy = gen_Weight(Wmy,K,kappa,T)
                Phi = fprime_m(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))
                Phi = np.concatenate((Phi,Phi),axis=0)
                Phi1 = np.array([np.diag(phi) for phi in Phi])
                invmat = np.array([np.linalg.inv(np.eye(nQ*nS*nT) - WWy @ phi1) for phi1 in Phi1])
                dHH = np.zeros((nN,nQ*nS*nT))
                dHH[:,np.arange(2,nQ*nS*nT,nQ)] = 1
                dHH = np.concatenate((dHH*h1,dHH*h2),axis=0)
                print('dYY: '+str(dYY.shape))
                print('Phi: '+str(Phi.shape))
                print('dHH: '+str(dHH.shape))
                invprod = np.einsum('ij,ijk->ik',dHH,Phi1)
                invprod = np.einsum('ij,ijk->ik',invprod,invmat)
                #invprod = np.array([dhh @ phi1 @ lil_invmat for dhh,phi1,this_invmat in zip(dHH,Phi1,invmat)])
                cost = np.sum((dYY[opto_mask] - invprod[opto_mask])**2)
                return cost
            

            def compute_isn_error(W):
                Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = parse_W(W)
                Phi = fprime_m(Eta,Xi**2+np.concatenate([s02 for ipixel in range(nS*nT)]))
                #print('min Eta: %f'%np.min(Eta[:,0]))
                #print('WEE: %f'%Wmy[0,0])
                #print('min phiE*WEE: %f'%np.min(Phi[:,0]*Wmy[0,0]))
                cost = -np.sum(np.log(Phi[:,0]*Wmy[0,0]-1))
                #print('ISN cost: %f'%cost)
                return cost
            
            Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = parse_W(W)

            #print_labeled('T',T)
            #print_labeled('K',K)
            #print_labeled('Wmy',Wmy)
            
            perturbation = perturbation_size*np.random.randn(*Eta.shape)
            
#             fval,fprimeval = compute_f_fprime_t_(W,perturbation) # Eta the mean input per cell, Xi the stdev. input per cell, s02 the baseline variability in input
            fval,fprimeval = compute_f_fprime_t_avg_(W,perturbation) # Eta the mean input per cell, Xi the stdev. input per cell, s02 the baseline variability in input
            #print_labeled('fval',fval)
            
            Xterm = compute_kl_error(XXhat,Xpc_list,XX,XXp,wtStim*wtInp) # XX the modeled input layer (L4)
            Yterm = compute_kl_error(YYhat,Ypc_list,fval,fprimeval,wtStim*wtCell) # fval the modeled output layer (L2/3)
            Etaterm = compute_sq_error(Eta,u_fn(XX,fval,Wmx,Wmy,K,kappa,T),wtStim*wtCell) # magnitude of fudge factor in mean input
            Xiterm = compute_sq_error(Xi,u_fn(XX,fval,Wsx,Wsy,K,kappa,T),wtStim*wtCell) # magnitude of fudge factor in input variability
            # returns value float
            #Optoterm = compute_opto_error(W)
            Optoterm = compute_opto_error_with_inv(W) #testing out 8/20/20
            cost = wtX*Xterm + wtY*Yterm + wtEta*Etaterm + wtXi*Xiterm + wtOpto*Optoterm
            if constrain_isn:
                ISNterm = compute_isn_error(W)
                cost = cost + wtISN*ISNterm
                
            if isinstance(Xterm,float):
                print('X:%f'%(wtX*Xterm))
                print('Y:%f'%(wtY*Yterm))
                print('Eta:%f'%(wtEta*Etaterm))
                print('Xi:%f'%(wtXi*Xiterm))
                print('Opto:%f'%(wtOpto*Optoterm))
                if constrain_isn:
                    print('ISN:%f'%(wtISN*ISNterm))

            #lbls = ['Yterm']
            #vars = [Yterm]
            lbls = ['cost']
            vars = [cost]
            for lbl,var in zip(lbls,vars):
                print_labeled(lbl,var)
            return cost

        def minusdLdW(W): 
            # returns value (R,)
            # sum in first dimension: (N,1) times (N,1) times (N,P)
#             return jacobian(minusLW)(W)
            return grad(minusLW)(W)
        
        def fix_violations(w,bounds):
            lb = np.array([b[0] for b in bounds])
            ub = np.array([b[1] for b in bounds])
            lb_violation = w<lb
            ub_violation = w>ub
            w[lb_violation] = lb[lb_violation]
            w[ub_violation] = ub[ub_violation]
            return w,lb_violation,ub_violation
        
        def sorted_r_eigs(w):
            drW,prW = np.linalg.eig(w)
            srtinds = np.argsort(drW)
            return drW[srtinds],prW[:,srtinds]
    
        def compute_eig_penalty_(Wmy,K0,kappa,T0):
            # still need to finish! Hopefully won't need
            # need to fix this to reflect addition of kappa argument
            Wsquig = gen_Weight(Wmy,K0,kappa,T0)
            drW,prW = sorted_r_eigs(Wsquig - np.eye(nQ*nS*nT))
            plW = np.linalg.inv(prW)
            eig_outer_all = [np.real(np.outer(plW[:,k],prW[k,:])) for k in range(nS*nQ*nT)]
            eig_penalty_size_all = [barrier_wt/np.abs(np.real(drW[k])) for k in range(nS*nQ*nT)]
            eig_penalty_dir_w = [eig_penalty_size*((eig_outer[:nQ,:nQ] + eig_outer[nQ:,nQ:]) + K0[np.newaxis,:]*(eig_outer[:nQ,nQ:] + kappa*eig_outer[nQ:,:nQ])) for eig_outer,eig_penalty_size in zip(eig_outer_all,eig_penalty_size_all)]
            eig_penalty_dir_k = [eig_penalty_size*((eig_outer[:nQ,nQ:] + eig_outer[nQ:,:nQ]*kappa)*W0my).sum(0) for eig_outer,eig_penalty_size in zip(eig_outer_all,eig_penalty_size_all)]
            eig_penalty_dir_kappa = [eig_penalty_size*(eig_outer[nQ:,:nQ]*k0[np.newaxis,:]*W0my).sum().reshape((1,)) for eig_outer,eig_penalty_size in zip(eig_outer_all,eig_penalty_size_all)]
            eig_penalty_dir_w = np.array(eig_penalty_dir_w).sum(0)
            eig_penalty_dir_k = np.array(eig_penalty_dir_k).sum(0)
            eig_penalty_dir_kappa = np.array(eig_penalty_dir_kappa).sum(0)
            return eig_penalty_dir_w,eig_penalty_dir_k,eig_penalty_dir_kappa
        
        def compute_eig_penalty(W):
            # still need to finish! Hopefully won't need
            W0mx,W0my,W0sx,W0sy,s020,K0,kappa0,T0,XX0,XXp0,Eta0,Xi0,h0 = parse_W(W)
            eig_penalty_dir_w,eig_penalty_dir_k,eig_penalty_dir_kappa = compute_eig_penalty_(W0my,k0,kappa0)
            eig_penalty_W = unparse_W(np.zeros_like(W0mx),eig_penalty_dir_w,np.zeros_like(W0sx),np.zeros_like(W0sy),np.zeros_like(s020),eig_penalty_dir_k,eig_penalty_dir_kappa,np.zeros_like(XX0),np.zeros_like(XXp0),np.zeros_like(Eta0),np.zeros_like(Xi0))
#             assert(True==False)
            return eig_penalty_W

        allhot = np.zeros(W0.shape)
        allhot[:nP*nQ+nQ**2] = 1
        W_l2_reg = lambda W: np.sum((W*allhot)**2)
        f = lambda W: minusLW(W) + l2_penalty*W_l2_reg(W)
        fprime = lambda W: minusdLdW(W) + 2*l2_penalty*W*allhot

        fix_violations(W0,bounds)
        
        W1,loss,result = sop.fmin_l_bfgs_b(f,W0,fprime=fprime,bounds=bounds,factr=1e4,maxiter=int(1e3))
        if compute_hessian:
            gr = grad(minusLW)(W1)
            hess = hessian(minusLW)(W1)
        else:
            gr = None
            hess = None
        
#         W0mx,W0my,W0sx,W0sy,s020,k0,kappa0,XX0,XXp0,Eta0,Xi0 = parse_W(W1)
        
        return W1,loss,gr,hess,result
Ejemplo n.º 20
0
def test_laplace_em_hessian(N=5, K=3, D=2, T=20):
    for transitions in ["standard", "recurrent", "recurrent_only"]:
        for emissions in ["gaussian_orthog", "gaussian"]:
            print("Checking analytical hessian for transitions={},  "
                  "and emissions={}".format(transitions, emissions))
            slds = ssm.SLDS(N,
                            K,
                            D,
                            transitions=transitions,
                            dynamics="gaussian",
                            emissions=emissions)
            z, x, y = slds.sample(T)
            new_slds = ssm.SLDS(N,
                                K,
                                D,
                                transitions="standard",
                                dynamics="gaussian",
                                emissions=emissions)

            inputs = [np.zeros((T, 0))]
            masks = [np.ones_like(y)]
            tags = [None]
            method = "laplace_em"
            datas = [y]
            num_samples = 1

            def neg_expected_log_joint_wrapper(x_vec, T, D):
                x = x_vec.reshape(T, D)
                return new_slds._laplace_neg_expected_log_joint(
                    datas[0], inputs[0], masks[0], tags[0], x, Ez, Ezzp1)

            variational_posterior = new_slds._make_variational_posterior(
                "structured_meanfield", datas, inputs, masks, tags, method)
            new_slds._fit_laplace_em_discrete_state_update(
                variational_posterior, datas, inputs, masks, tags, num_samples)
            Ez, Ezzp1, _ = variational_posterior.discrete_expectations[0]

            x = variational_posterior.mean_continuous_states[0]
            scale = x.size
            J_diag, J_lower_diag = new_slds._laplace_hessian_neg_expected_log_joint(
                datas[0], inputs[0], masks[0], tags[0], x, Ez, Ezzp1)
            dense_hessian = scipy.linalg.block_diag(*[x for x in J_diag])
            dense_hessian[D:, :-D] += scipy.linalg.block_diag(
                *[x for x in J_lower_diag])
            dense_hessian[:-D, D:] += scipy.linalg.block_diag(
                *[x.T for x in J_lower_diag])

            true_hess = hessian(neg_expected_log_joint_wrapper)(x.reshape(-1),
                                                                T, D)
            assert np.allclose(true_hess, dense_hessian)
            print("Hessian passed.")

            # Also check that computation of H works.
            h_dense = dense_hessian @ x.reshape(-1)
            h_dense = h_dense.reshape(T, D)

            J_ini, J_dyn_11, J_dyn_21, J_dyn_22, J_obs = new_slds._laplace_neg_hessian_params(
                datas[0], inputs[0], masks[0], tags[0], x, Ez, Ezzp1)
            h_ini, h_dyn_1, h_dyn_2, h_obs = new_slds._laplace_neg_hessian_params_to_hs(
                x, J_ini, J_dyn_11, J_dyn_21, J_dyn_22, J_obs)

            h = h_obs.copy()
            h[0] += h_ini
            h[:-1] += h_dyn_1
            h[1:] += h_dyn_2

            assert np.allclose(h, h_dense)
Ejemplo n.º 21
0
def test_hessian_vector_product():
    fun = lambda a: np.sum(np.sin(a))
    a = npr.randn(5)
    v = npr.randn(5)
    H = hessian(fun)(a)
    check_equivalent(np.dot(H, v), hessian_vector_product(fun)(a, v))
Ejemplo n.º 22
0
        return -np.inf

    return lp + log_likelihood(th)


def nll(th):
    return -log_likelihood(th)


def nlpost(th):
    return -log_probability(th)


dnll = jacobian(nll)
dnlpost = jacobian(nlpost)
ddnlpost = hessian(nlpost)

time_bnds = [1e-5, pd['nt']]
ang_bnds = [0, 2 * np.pi]
bounds_dict = {
    'tau': [10, 500],
    'diff': [1e-5, 1e-1],
    'xpos': [-0.5, 0.5],
    'ypos': [-0.5, 0.5],
    'a0': [-10, 10],
    'a3': ang_bnds,
    'b0': [-50, 50],
    'b1': time_bnds,
    'b2': time_bnds,
    'b3': ang_bnds,
    'c0': [-100, 100],
Ejemplo n.º 23
0
 def ggnvp_maker(x):
     J = jacobian(f)(x)
     H = hessian(g)(f(x))
     def ggnvp(v):
         return np.dot(J.T, np.dot(H, np.dot(J, v)))
     return ggnvp
Ejemplo n.º 24
0
    def compute_dParams_dWeights(self,
                                 some_example_weights,
                                 solver_method='cholesky',
                                 non_fixed_dims=None,
                                 rank=-1,
                                 **kwargs):
        '''
    sets self.jacobian = dParams_dxn for each datapoint x_n
    rank = -1 uses a full-rank matrix solve (i.e. np.linalg.solve on the full
      Hessian). A positive integer uses a low rank approximation in
      inverse_hessian_vector_product  
      
    '''
        if non_fixed_dims is None:
            non_fixed_dims = np.arange(self.params.get_free().shape[0])
        if len(non_fixed_dims) == 0:
            self.dParams_dWeights = np.zeros(
                (0, some_example_weights.shape[0]))
            return

        dObj_dParams = autograd.jacobian(self.weighted_model_objective,
                                         argnum=1)
        d2Obj_dParams2 = autograd.jacobian(dObj_dParams, argnum=1)
        d2Obj_dParamsdWeights = autograd.jacobian(dObj_dParams, argnum=0)

        # Have to re-copy this into self.params after every autograd call, as
        #  autograd turns self.params.get_free() into an ArrayBox (whereas we want
        #  it to be a numpy array)
        #array_box_go_away = self.params.get_free().copy()
        #cur_weights = self.example_weights.copy()

        start = time.time()
        grads = self.compute_gradients(some_example_weights)
        X = self.training_data.X

        if solver_method == 'cholesky':
            eval_reg_hess = autograd.hessian(self.regularization)
            tmp = self.params.get_free().copy()
            reg_hess = eval_reg_hess(self.params.get_free())
            reg_hess[-1, :] = 0.0
            reg_hess[:, -1] = 0.0
            self.params.set_free(tmp)
            self.dParams_dWeights = -solvers.ihvp_cholesky(
                grads, X, self.D2, regularizer_hessian=reg_hess)
        elif solver_method == 'agarwal':
            eval_reg_hess = autograd.hessian(self.regularization)
            tmp = self.params.get_free().copy()
            reg_hess = eval_reg_hess(self.params.get_free())
            reg_hess[-1, :] = 0.0
            reg_hess[:, -1] = 0.0
            self.params.set_free(tmp)
            self.dParams_dWeights = -solvers.ihvp_agarwal(
                grads, X, self.D2, regularizer_hessian=reg_hess, **kwargs)
        elif solver_method == 'lanczos':
            print('NOTE lanczos currently assumes l2 regularization')
            self.dParams_dWeights = -solvers.ihvp_exactEvecs(
                grads, X, self.D2, rank=rank, L2Lambda=self.L2Lambda)
        elif solver_method == 'tropp':
            print('NOTE tropp currently assumes l2 regularization')
            self.dParams_dWeights = -solvers.ihvp_tropp(
                grads, X, self.D2, L2Lambda=self.L2Lambda, rank=rank)

        #self.params.set_free(array_box_go_away)
        #self.example_weights = cur_weights
        self.non_fixed_dims = non_fixed_dims
Ejemplo n.º 25
0
def test_hessian_matrix_product():
    fun = lambda a: np.sum(np.sin(a))
    a = npr.randn(5, 4)
    V = npr.randn(5, 4)
    H = hessian(fun)(a)
    check_equivalent(np.tensordot(H, V), hessian_vector_product(fun)(a, V))
Ejemplo n.º 26
0
    # We have double counted the diagonal.  For some reason the autograd
    # diagonal functions require axis1=-1 and axis2=-2
    mat_val = mat_val - \
        np.make_diagonal(np.diagonal(ld_mat, axis1=-1, axis2=-2),
                         axis1=-1, axis2=-2)
    return mat_val


def pos_def_matrix_free_to_vector(free_val, diag_lb=0.0):
    mat_val = unpack_posdef_matrix(free_val, diag_lb=diag_lb)
    return vectorize_ld_matrix(mat_val)

pos_def_matrix_free_to_vector_jac = \
    autograd.jacobian(pos_def_matrix_free_to_vector)
pos_def_matrix_free_to_vector_hess = \
    autograd.hessian(pos_def_matrix_free_to_vector)

class PosDefMatrixParam(object):
    def __init__(self, name='', size=2, diag_lb=0.0, val=None):
        self.name = name
        self.__size = int(size)
        self.__vec_size = int(size * (size + 1) / 2)
        self.__diag_lb = diag_lb
        assert diag_lb >= 0
        if val is None:
            self.__val = np.diag(np.full(self.__size, diag_lb + 1.0))
        else:
            self.set(val)

        # These will be dense, so just use autograd directly.
        self.free_to_vector_jac_dense = autograd.jacobian(self.free_to_vector)
Ejemplo n.º 27
0
 def hess(x, g): return np.tensordot(ad.hessian(objective)(x),
                                     g, axes=x.ndim)
 return hess
Ejemplo n.º 28
0
def survival_fit_weights(censored_inputs, noncensored_inputs, C, maxiter):
    """
    Minimize the negative log of MLE to get gamma, the covariate weight vector. See Eq 19 in paper.

    Parameters:

    ----------------------------

    censored_inputs: An array consisting of censored inputs of the form [time, prob, covariates].
    E.g. [[1, 0.5, [1.4,2.3,5.2]],...].

    noncensored_inputs: Same as above except these represent the noncensored rows. The number of covariates is the same as above, but we
    could have a different number of samples.

    maxiter: Maximum number of iterations for numerical solver

    C: Positive float giving the strength of L^2 regularization parameter.

    Returns:

    ---------------------------------

    Weights: [scaling, shape, gamma], which is flat array where gamma is the covaraite vector weights.
    """
    n_cens = len(censored_inputs)
    n_noncens = len(noncensored_inputs)
    n_rows = n_cens + n_noncens

    def training_loss(flatparam):

        arr = flatparam[2:]  # gamma

        param = [flatparam[0], flatparam[1], arr]  # [scaling, shape, gamma]

        # Training loss is the negative log-likelihood.

        known_loss = np.log(
            np.array(mod_prob_density(noncensored_inputs,
                                      param)))  # noncensored loss term
        unknown_loss = np.log(
            np.array(mod_overall_survival(censored_inputs,
                                          param)))  # censored loss term
        reg = np.dot(np.array(arr), np.array(arr))

        return C * reg - 1 / n_rows * (np.sum(known_loss) +
                                       np.sum(unknown_loss))

    training_gradient = grad(training_loss)

    hess = hessian(training_loss)

    length = len((censored_inputs[0])[2]) + 2

    b = (0.001, None
         )  # Make sure that both the shape and scaling parameter positive.

    bnds = (b, b) + tuple(
        (None, None) for x in range(length - 2)
    )  # The covariate vector components do not need to be positive.

    guess = np.random.uniform(low=0.1, high=0.9, size=length)

    res = minimize(
        training_loss,
        guess,
        method="SLSQP",
        jac=training_gradient,
        bounds=bnds,
        options={"maxiter": maxiter},
    )

    model_weights = res.x

    log_likelihood = (-n_rows) * training_loss(model_weights)

    observed_information_matrix = n_rows * hess(model_weights)

    stand_errors = np.sqrt(inv(observed_information_matrix).diagonal())

    return model_weights, stand_errors, log_likelihood