def main():
    (parser, loss) = KLD(50)
    print(parser)
    print(parser.idxs_and_shapes)
    datum = {}
    datum['mu1']=np.zeros(50)
    datum['mu2']=np.ones(50)
    datum['sig1']=5
    datum['sig2']=6

    trial_vecs = []
    for _ in range(5):
        trial_vecs.append(np.random.rand(50))
    value_and_grad_fun = value_and_grad(pairwise_distance)
    value, grad = value_and_grad_fun(trial_vecs)
    

    print(trial_vecs)

    weights = parser.stack(datum)
    value_and_grad_fun = value_and_grad(loss)
    value, grad = value_and_grad_fun(weights)
    print(value)
    weights = weights - 10e-4*grad
    value, grad = value_and_grad_fun(weights)
    print(value)


    pass 
def opt_traj(func, fdict, T, opt_method = 'SGD', init = None, \
    learning_rate = 0.1, seed = 100, momentum = False, noise_level = 0.0):
    # do optimization and return the trajectory
    params = {'x': 0.0, 'y': 0.0}
    domain = fdict['domain']
    optimum = fdict['optimum']
    loss_and_grad = value_and_grad(func)
    #quick_grad_check(func, params)   
    params = init_params(params, domain, init, seed)
    check_grads(func, params)
    opt_server = Parameter_Server(opt_method, momentum)
    opt_server.init_gradient_storage(params)
    
    x_traj = []
    y_traj = []
    f_traj = []
    
    print 'optimising function using %s...' % opt_method
    for t in xrange(T):
        (func_value, func_grad) = loss_and_grad(params)
        x_traj.append(params['x'])
        y_traj.append(params['y'])
        f_traj.append(func_value)
        func_grad = inject_noise(func_grad, noise_level)
        if opt_method == 'SGD':
            norm = np.sqrt(func_grad['x'] ** 2 + func_grad['y'] ** 2)
            if norm >= 2.0:
                func_grad['x'] /= norm / 2; func_grad['y'] /= norm / 2
        params = opt_server.update(params, func_grad, learning_rate)

    return np.array(x_traj), np.array(y_traj), np.array(f_traj)
Example #3
0
def test_value_and_grad():
    fun = lambda x: np.sum(np.sin(x)**2)
    dfun = grad(fun)
    dfun_both = value_and_grad(fun)
    x = npr.randn(5)
    check_equivalent(fun(x), dfun_both(x)[0])
    check_equivalent(dfun(x), dfun_both(x)[1])
Example #4
0
File: lols.py Project: hal3/aglols
def value_and_grad(learner, trainingData, weights, extraObjective=None):
    def trainIt(weights):
        global globalObjective
        globalObjective += learner.train(trainingData, weights)
        if extraObjective is not None:
            globalObjective += extraObjective(weights)
        return globalObjective
    return autograd.value_and_grad(trainIt)
Example #5
0
    def max_likelihood(self, data, weights=None, stats=None, lmbda=0.1):
        """
        As an alternative to MCMC with Polya-gamma augmentation,
        we also implement maximum likelihood learning via gradient
        descent with autograd. This follows the pybasicbayes
        convention.

        :param data: list of tuples, (x,y), for each dataset.
        :param weights: Not used in this implementation.
        :param stats: Not used in this implementation.
        """
        import autograd.numpy as anp
        from autograd import value_and_grad, hessian_vector_product
        from scipy.optimize import minimize

        assert weights is None
        assert stats is None
        if not isinstance(data, list):
            assert isinstance(data, tuple) and len(data) == 2
            data = [data]

        # Define a helper function for the log of the logistic fn
        def loglogistic(psi):
            return psi - anp.log(1+anp.exp(psi))

        # optimize each row of A and b
        for n in range(self.D_out):

            # Define an objective function for the n-th row of hstack((A, b))
            # This is the negative log likelihood of the n-th column of data.
            def nll(abn):
                an, bn = abn[:-1], abn[-1]
                T = 0
                ll = 0
                for (x, y) in data:
                    T += x.shape[0]
                    yn = y[:, n]
                    psi = anp.dot(x, an) + bn
                    ll += anp.sum(yn * loglogistic(psi))
                    ll += anp.sum((1 - yn) * loglogistic(-1. * psi))

                # Include a penalty on the weights
                ll -= lmbda * T * anp.sum(an**2)
                ll -= lmbda * T * bn**2

                return -1 * ll / T

            abn0 = np.concatenate((self.A[n], self.b[n]))
            res = minimize(value_and_grad(nll), abn0,
                           tol=1e-3,
                           method="Newton-CG",
                           jac=True,
                           hessp=hessian_vector_product(nll))

            assert res.success
            self.A[n] = res.x[:-1]
            self.b[n] = res.x[-1]
Example #6
0
def test_return_both():
    fun = lambda x : 3.0 * x**3.2
    d_fun = grad(fun)
    f_and_d_fun = value_and_grad(fun)

    test_x = 1.7
    f, d = f_and_d_fun(test_x)
    assert f == fun(test_x)
    assert d == d_fun(test_x)
Example #7
0
def test_value_and_grad():
    fun = lambda x: np.sum(np.sin(x)**2)
    dfun = grad(fun)
    dfun_both = value_and_grad(fun)
    x = npr.randn(5)
    assert not isbox(dfun_both(x)[0])
    check_equivalent(fun(x), dfun_both(x)[0])
    check_equivalent(dfun(x), dfun_both(x)[1])

    def fun2(x): return dfun_both(x)[0]
    check_grads(fun2)(x)
Example #8
0
def test_comparison_values():
    compare_funs = [lambda x, y : np.sum(x <  x),
                    lambda x, y : np.sum(x <= y),
                    lambda x, y : np.sum(x >  y),
                    lambda x, y : np.sum(x >= y),
                    lambda x, y : np.sum(x == y),
                    lambda x, y : np.sum(x != y)]

    for arg1, arg2 in arg_pairs():
        for fun in compare_funs:
            fun_val = fun(arg1, arg2)
            fun_val_from_grad, _ = value_and_grad(fun)(arg1, arg2)
            assert fun_val == fun_val_from_grad, (fun_val, fun_val_from_grad)
Example #9
0
    def choose_next_point(domain_min, domain_max, acquisition_function, num_tries=15, rs=npr.RandomState(0)):
        """Uses gradient-based optimization to find next query point."""
        init_points = rs.rand(num_tries, D) * (domain_max - domain_min) + domain_min

        grad_obj = value_and_grad(lambda x: -acquisition_function(x))
        def optimize_point(init_point):
            print('.', end='')
            result = minimize(grad_obj, x0=init_point, jac=True, method='L-BFGS-B',
                              options={'maxiter': 10}, bounds=list(zip(domain_min, domain_max)))
            return result.x, acquisition_function(result.x)
        optimzed_points, optimized_values = list(zip(*list(map(optimize_point, init_points))))
        print()
        best_ix = np.argmax(optimized_values)
        return np.atleast_2d(optimzed_points[best_ix])
Example #10
0
    def _M_step(self, free_vars, resp, alpha,
                free_vars_shape, fixed_vars, is_fixed_vars, priors,
                optim_opts={},
                **kwargs):

        # inconvenient reshaping of responsibilities
        responsibs = ([item[:, i] for item in resp]
                      for i in range(len(self._ifix)))

        Cg = self.latentforces[0].kernel(self.ttc[:, None])
        Cg[np.diag_indices_from(Cg)] += 1e-5
        Lg = np.linalg.cholesky(Cg)
        Cginv = cho_solve((Lg, True), np.eye(Lg.shape[0]))

        rr = [*responsibs]

        def _objfunc(arg):
            g, vbeta, mu_ivp = _var_mixer(arg, free_vars_shape, fixed_vars, is_fixed_vars)

            # some reshaping
            beta = vbeta.reshape((self.dim.R+1, self.dim.D))
            mu_ivp = mu_ivp.reshape((len(self._ifix),
                                     len(self.Y_train_),
                                     self.dim.K))
            vals = []
            for i, ifx in enumerate(self._ifix):
                vals.append(
                    self.forward_error(g, beta, alpha, mu_ivp[i], ifx, rr[i]))

            logprior = -0.5*np.dot(g, np.dot(Cginv, g))

            for vn, x in zip(['beta'], [vbeta]):
                try:
                    prior_logpdf = priors[vn]
                    logprior = logprior + prior_logpdf(x)
                except KeyError:
                    pass
            return np.sum(vals) - logprior

        res = minimize(autograd.value_and_grad(_objfunc),
                       free_vars,
                       jac=True, **optim_opts)
        return res.x
Example #11
0
def test_value_and_multigrad():
    def complicated_fun(a,b,c,d,e,f=1.1, g=9.0):
        return a + np.sin(b) + np.cosh(c) + np.cos(d) + np.tan(e) + f + g

    A = 0.5
    B = -0.3
    C = 0.2
    D = -1.1
    E = 0.7
    F = 0.6
    G = -0.1

    dfun = grad(complicated_fun, argnum=[3, 1])
    dfun_both = value_and_grad(complicated_fun, argnum=[3, 1])

    check_equivalent(complicated_fun(A, B, C, D, E, f=F, g=G),
                     dfun_both(A, B, C, D, E, f=F, g=G)[0])

    check_equivalent(dfun(A, B, C, D, E, f=F, g=G),
                     dfun_both(A, B, C, D, E, f=F, g=G)[1])
    def distance_from_target_image(smoke):
        return np.mean((target - smoke)**2)

    def convert_param_vector_to_matrices(params):
        vx = np.reshape(params[:(rows*cols)], (rows, cols))
        vy = np.reshape(params[(rows*cols):], (rows, cols))
        return vx, vy

    def objective(params):
        init_vx, init_vy = convert_param_vector_to_matrices(params)
        final_smoke = simulate(init_vx, init_vy, init_smoke, simulation_timesteps)
        return distance_from_target_image(final_smoke)

    # Specify gradient of objective function using autograd.
    objective_with_grad = value_and_grad(objective)

    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111, frameon=False)

    def callback(params):
        init_vx, init_vy = convert_param_vector_to_matrices(params)
        simulate(init_vx, init_vy, init_smoke, simulation_timesteps, ax)

    print("Optimizing initial conditions...")
    result = minimize(objective_with_grad, init_dx_and_dy, jac=True, method='CG',
                      options={'maxiter':25, 'disp':True}, callback=callback)

    print("Rendering optimized flow...")
    init_vx, init_vy = convert_param_vector_to_matrices(result.x)
    simulate(init_vx, init_vy, init_smoke, simulation_timesteps, ax, render=True)
Example #13
0
        sum_loss_M += loss_M(Y, X, U, b)
        diff_U = U - sigma_u * np.eye(U.shape[0])
        sum_reg_u += np.linalg.norm(diff_U, ord='fro')**2

    Rx = lambda_x * np.sum((np.linalg.norm(X, axis=1)**2))
    Ru = lambda_u * sum_reg_u
    return Rx + Ru + sum_loss_M


# %%
def paramsX_to_minimize(params, Ys, Us, b):
    X = params.reshape((int(params.size / D), D))
    return loss_all(Ys, X, Us, b)


paramsX_to_minimize_with_grad = value_and_grad(paramsX_to_minimize)


# %%
def paramsUs_to_minimize(params, Ys, X, b):
    return loss_all(Ys, X, params, b)


paramsUs_to_minimize_with_grad = value_and_grad(paramsUs_to_minimize)


# %%
def transform_clusters(raw_clusters):
    '''
    input [
           [['item1','item3'],['item2','item4']],
Example #14
0
                    self.locationMaps, 1)
                self.graphic.update(
                    'forward time: {0} of {1}'.format(t, self.nTime),
                    self.pausetime)


measurements = []

for lExp in range(-4, 5):
    myProblem = parabolicProblem()
    myProblem.l = 10**lExp
    print "Regularization is {0}".format(myProblem.l)
    x = 0.0 * np.copy(
        myProblem.referenceRHS[0::myProblem.nDofs])  #np.zeros(myProblem.nTime)

    myProWithGrad = value_and_grad(myProblem)

    #print "value_and_grad: " + str(myProWithGrad(x))
    #print x0
    #x0[0::myProblem.nDofs] = np.ones(myProblem.nTime)*0.001

    x, f, d = scipy.optimize.fmin_l_bfgs_b(myProWithGrad,
                                           x,
                                           fprime=None,
                                           args=(),
                                           approx_grad=0,
                                           bounds=None,
                                           m=10,
                                           factr=1e0,
                                           pgtol=1e-20,
                                           iprint=1,
Example #15
0
                ]),
                alpha=.15,
                fc='Blue',
                ec='None')

        # Show samples from posterior.
        rs = npr.RandomState(0)
        sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10)
        ax.plot(plot_xs, sampled_funcs.T)

        ax.plot(X, y, 'kx')
        ax.set_ylim([-1.5, 1.5])
        ax.set_xticks([])
        ax.set_yticks([])
        plt.draw()
        plt.pause(20.0 / 60.0)

    # Initialize covariance parameters
    rs = npr.RandomState(0)
    init_params = 0.1 * rs.randn(num_params)

    import pdb
    pdb.set_trace()

    print("Optimizing covariance parameters...")
    cov_params = minimize(value_and_grad(objective),
                          init_params,
                          jac=True,
                          method='CG',
                          callback=callback)
    plt.pause(10.0)
Example #16
0
 def _value_and_grad(x, i):
     v, g = value_and_grad(unflatten(x), i)
     return v, flatten(g)[0]
Example #17
0
    def _fit_stochastic_em(self,
                           optimizer,
                           datas,
                           inputs,
                           masks,
                           tags,
                           num_epochs=100,
                           **kwargs):
        """
        Replace the M-step of EM with a stochastic gradient update using the ELBO computed
        on a minibatch of data.
        """
        M = len(datas)
        T = sum([data.shape[0] for data in datas])

        # A helper to grab a minibatch of data
        perm = [np.random.permutation(M) for _ in range(num_epochs)]

        def _get_minibatch(itr):
            epoch = itr // M
            m = itr % M
            i = perm[epoch][m]
            return datas[i], inputs[i], masks[i], tags[i][i]

        # Define the objective (negative ELBO)
        def _objective(params, itr):
            # Grab a minibatch of data
            data, input, mask, tag = _get_minibatch(itr)
            Ti = data.shape[0]

            # E step: compute expected latent states with current parameters
            Ez, Ezzp1, _ = self.expected_states(data, input, mask, tag)

            # M step: set the parameter and compute the (normalized) objective function
            self.params = params
            pi0 = self.init_state_distn.initial_state_distn
            log_Ps = self.transitions.log_transition_matrices(
                data, input, mask, tag)
            log_likes = self.observations.log_likelihoods(
                data, input, mask, tag)

            # Compute the expected log probability
            # (Scale by number of length of this minibatch.)
            obj = self.log_prior()
            obj += np.sum(Ez[0] * np.log(pi0)) * M
            obj += np.sum(Ezzp1 * log_Ps) * (T - M) / (Ti - 1)
            obj += np.sum(Ez * log_likes) * T / Ti
            assert np.isfinite(obj)

            return -obj / T

        # Set up the progress bar
        lls = [-_objective(self.params, 0) * T]
        pbar = trange(num_epochs * M)
        pbar.set_description("Epoch {} Itr {} LP: {:.1f}".format(
            0, 0, lls[-1]))

        # Run the optimizer
        step = dict(sgd=sgd_step, rmsprop=rmsprop_step,
                    adam=adam_step)[optimizer]
        state = None
        for itr in pbar:
            self.params, val, g, state = step(value_and_grad(_objective),
                                              self.params, itr, state,
                                              **kwargs)
            epoch = itr // M
            m = itr % M
            lls.append(-val * T)
            pbar.set_description("Epoch {} Itr {} LP: {:.1f}".format(
                epoch, m, lls[-1]))
            pbar.update(1)

        return lls
Example #18
0
    def initialize(self,
                   base_model,
                   datas,
                   inputs=None,
                   masks=None,
                   tags=None,
                   num_em_iters=50,
                   num_tr_iters=50):

        print("Initializing...")
        print("First with FA using {} steps of EM.".format(num_em_iters))
        fa, xhats, Cov_xhats, lls = factor_analysis_with_imputation(
            self.D, datas, masks=masks, num_iters=num_em_iters)

        if self.D == 1 and base_model.transitions.__class__.__name__ == "DDMTransitions":

            d_init = np.mean([y[0:3] for y in datas], axis=(0, 1))
            u_sum = np.array([np.sum(u) for u in inputs])
            y_end = np.array([y[-3:] for y in datas])
            u_l, u_u = np.percentile(
                u_sum, [20, 80])  # use 20th and 80th percentile input
            y_U = y_end[np.where(u_sum >= u_u)]
            y_L = y_end[np.where(u_sum <= u_l)]
            C_init = (1.0 / 2.0) * np.mean(
                (np.mean(y_U, axis=0) - np.mean(y_L, axis=0)), axis=0)

            self.Cs = C_init.reshape([1, self.N, self.D])
            self.ds = d_init.reshape([1, self.N])
            self.inv_etas = np.log(fa.sigmasq).reshape([1, self.N])

        else:

            # define objective
            Td = sum([x.shape[0] for x in xhats])

            def _objective(params, itr):
                new_datas = [np.dot(x, params[0].T) + params[1] for x in xhats]
                obj = base_model.log_likelihood(new_datas, inputs=inputs)
                return -obj / Td

            # initialize R and r
            R = 0.1 * np.random.randn(self.D, self.D)
            r = 0.01 * np.random.randn(self.D)
            params = [R, r]

            print(
                "Next by transforming latents to match AR-HMM prior using {} steps of max log likelihood."
                .format(num_tr_iters))
            state = None
            lls = [-_objective(params, 0) * Td]
            pbar = trange(num_tr_iters)
            pbar.set_description("Epoch {} Itr {} LP: {:.1f}".format(
                0, 0, lls[-1]))

            for itr in pbar:
                params, val, g, state = sgd_step(value_and_grad(_objective),
                                                 params, itr, state)
                lls.append(-val * Td)
                pbar.set_description("LP: {:.1f}".format(lls[-1]))
                pbar.update(1)

            R = params[0]
            r = params[1]

            # scale x's to be max at 1.1
            for d in range(self.D):
                x_transformed = [(np.dot(x, R.T) + r)[:, d] for x in xhats]
                max_x = np.max(x_transformed)
                R[d, :] *= 1.1 / max_x
                r[d] *= 1.1 / max_x

            self.Cs = (fa.W @ np.linalg.inv(R)).reshape([1, self.N, self.D])
            self.ds = fa.mean - fa.W @ np.linalg.inv(R) @ r
            self.inv_etas = np.log(fa.sigmasq).reshape([1, self.N])
Example #19
0
 def optimize_gp_params(init_params, X, y):
     log_hyperprior = lambda params: np.sum(norm.logpdf(params, 0., 100.))
     objective = lambda params: -log_marginal_likelihood(params, X, y) -log_hyperprior(params)
     return minimize(value_and_grad(objective), init_params, jac=True, method='CG').x
Example #20
0
    ax_true_latents.set_title("True latents")
    ax_true_latents.set_xticks([])
    ax_true_weights.set_xticks([])
    ax_true_latents.set_yticks([])
    ax_true_weights.set_yticks([])

    def objective(params):
        weight_matrix, latents, noise_std = unpack_params(params)
        return -logprob(weight_matrix, latents, noise_std, data)/n_samples

    def callback(params):
        weights, latents, noise_std = unpack_params(params)
        print("Log likelihood {}, noise_std {}".format(-objective(params), noise_std))
        ax_est_weights.cla()
        ax_est_weights.scatter(weights[:, 0], weights[:, 1])
        ax_est_weights.set_title("Estimated weights")
        ax_est_latents.cla()
        color_scatter(ax_est_latents, latents[0, :], latents[1, :])
        ax_est_latents.set_title("Estimated latents")
        ax_est_weights.set_yticks([])
        ax_est_latents.set_yticks([])
        ax_est_weights.set_xticks([])
        ax_est_latents.set_xticks([])
        plt.draw()
        plt.pause(1.0/60.0)

    # Initialize and optimize model.
    rs = npr.RandomState(0)
    init_params = rs.randn(total_num_params)
    minimize(value_and_grad(objective), init_params, jac=True, method='CG', callback=callback)
    plt.pause(20)
Example #21
0
# Build likelihood model.
    L2_reg = 1
    layer_sizes = [784, 200, 100, 10]
    num_weights, make_predictions, likelihood = make_classification_nn(
        layer_sizes)
    classifier_loglik = lambda image, c: make_predictions(
        trained_weights, np.atleast_2d(image))[:, c]

    data_L = create_prob_of_data(parameters, encoder, decoder_log_like)
    # Combine prior and likelihood.
    model_ll = lambda image, c: data_L(image) + classifier_loglik(image, c)

    def model_nll(image, c):
        return -1 * model_ll(image, c)

    model_nll_with_grad = value_and_grad(model_nll)

    # Optimize a random image to maximize this likelihood.
    cur_class = 2
    start_image = np.zeros((28 * 28))

    # quick_grad_check(data_L, start_image)


    def callback(image):
        #print "Cur loglik: ", image_prior_nll(image), "mean loglik:", image_prior_nll(all_mean)
        matplotlib.image.imsave("optimizing", image.reshape((28, 28)))

    # Optimize using conjugate gradients.
    result = minimize(model_nll_with_grad,
                      callback=callback,
Example #22
0
 def _step(self, optimizer, X, scalings):
     obj, grad = value_and_grad(calc_potential_energy)(scalings, X)
     scalings = optimizer.next(scalings, np.array(grad))
     scalings = normalize(scalings, xl=0, xu=scalings.max())
     return scalings, obj
Example #23
0
nruns_J = int(sys.argv[5])
replicate_point = (len(sys.argv) >= 7 and sys.argv[6] == "-rep")

fn_in = dir_in + fn
fn_out = dir_out + fn

alphas,means,icf,x,wishart_gamma,wishart_m = gmm.read_gmm_instance(fn_in + ".txt", replicate_point)

start = t.time()
for i in range(nruns_f):
    err = gmm.gmm_objective(alphas,means,icf,x,wishart_gamma,wishart_m)
end = t.time()
tf = (end - start)/nruns_f

k = alphas.size
grad_gmm_objective_split_inner_wrapper = value_and_grad(gmm_objective_split_inner_wrapper)
grad_gmm_objective_split_other_wrapper = value_and_grad(gmm_objective_split_other_wrapper)
start = t.time()
for i in range(nruns_J):
    grad = grad_gmm_objective_split_other_wrapper((alphas,means,icf),x,wishart_gamma,wishart_m)
    for ix in range(x.shape[0]):
        grad = add_grad(grad,grad_gmm_objective_split_inner_wrapper((alphas,means,icf),x[ix,:]))
end = t.time()

tJ = 0
name = "Autograd_split"
if nruns_J>0:
    tJ = (end - start)/nruns_J
    gmm.write_J(fn_out + "_J_" + name + ".txt",grad[1])
    
gmm.write_times(fn_out + "_times_" + name + ".txt",tf,tJ)
Example #24
0
        print("Training text                         Predicted text")
        logprobs = np.asarray(pred_fun(weights, train_inputs))
        for t in range(logprobs.shape[1]):
            training_text = one_hot_to_string(train_targets[:, t, :])
            predicted_text = one_hot_to_string(logprobs[:, t, :])
            print(
                training_text.replace('\n', ' ') + "| " +
                predicted_text.replace('\n', ' '))

    def callback(weights):
        print("Train loss:", loss_fun(weights, train_inputs, train_targets))
        print_training_prediction(weights, train_inputs, train_targets)

# Build gradient of loss function using autograd.

    loss_and_grad = value_and_grad(loss_fun)

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss_and_grad(weights):
        return loss_and_grad(weights, train_inputs, train_targets)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets))

    print("Training LSTM...")
    result = minimize(training_loss_and_grad,
                      init_weights,
                      jac=True,
                      method='CG',
                      options={'maxiter': train_iters},
Example #25
0
fn_out = dir_out + fn


def gmm_objective_wrapper(params, x, wishart_gamma, wishart_m):
    return gmm.gmm_objective(params[0], params[1], params[2], x, wishart_gamma, wishart_m)


alphas, means, icf, x, wishart_gamma, wishart_m = gmm.read_gmm_instance(fn_in + ".txt", replicate_point)


start = t.time()
for i in range(nruns_f):
    err = gmm.gmm_objective(alphas, means, icf, x, wishart_gamma, wishart_m)
end = t.time()
tf = (end - start) / nruns_f

k = alphas.size
grad_gmm_objective_wrapper = value_and_grad(gmm_objective_wrapper)
start = t.time()
for i in range(nruns_J):
    grad = grad_gmm_objective_wrapper((alphas, means, icf), x, wishart_gamma, wishart_m)
end = t.time()

tJ = 0
name = "Autograd"
if nruns_J > 0:
    tJ = (end - start) / nruns_J
    gmm.write_J(fn_out + "_J_" + name + ".txt", grad[1])

gmm.write_times(fn_out + "_times_" + name + ".txt", tf, tJ)
Example #26
0
def bealeFunction(conf):
    global line, point, path, f

    f = lambda x, y: (1.5 - x + x * y)**2 + (2.25 - x + x * y**2)**2 + (
        2.625 - x + x * y**3)**2

    xmin, xmax, xstep = -4.5, 4.5, .2
    ymin, ymax, ystep = -4.5, 4.5, .2
    x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep),
                       np.arange(ymin, ymax + ystep, ystep))
    z = f(x, y)
    minima = np.array([3., .5])
    minima_ = minima.reshape(-1, 1)

    x0 = np.array([3., 4.])
    func = value_and_grad(lambda args: f(*args))

    path_ = [x0]

    res = minimize(func,
                   x0=x0,
                   method='Newton-CG',
                   jac=True,
                   tol=1e-20,
                   callback=make_minimize_cb(path_))

    path = np.array(path_).T

    #3D surface plot
    fig = plt.figure(figsize=(8, 5))
    ax = plt.axes(projection='3d', elev=50, azim=-50)

    ax.plot_surface(x,
                    y,
                    z,
                    norm=LogNorm(),
                    rstride=1,
                    cstride=1,
                    edgecolor='none',
                    alpha=.8,
                    cmap=plt.cm.jet)
    ax.plot(minima_[0],
            minima_[1],
            f(minima_[0], minima_[1]),
            'r*',
            markersize=10)

    line, = ax.plot([], [], [], 'b', label='Newton-CG', lw=2)
    point, = ax.plot([], [], [], 'bo')

    ax.set_xlabel('$x$')
    ax.set_ylabel('$y$')
    ax.set_zlabel('$z$')

    ax.set_xlim((xmin, xmax))
    ax.set_ylim((ymin, ymax))

    anim = animation.FuncAnimation(fig,
                                   animate,
                                   init_func=init,
                                   frames=path.shape[1],
                                   interval=60,
                                   repeat_delay=5,
                                   blit=True)
    anim.save('basic_animation.mp4', fps=30, extra_args=['-vcodec', 'libx264'])

    return
Example #27
0
def experiment(sname, seed, nystr=False):
    def LMO_err(params, M=2):
        al, bl = np.exp(params)
        L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
        if nystr:
            tmp_mat = L @ eig_vec_K
            C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 +
                                            inv_eig_val_K) @ tmp_mat.T / N2
            c = C @ W_nystr_Y * N2
        else:
            LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
            C = L @ LWL_inv @ L / N2
            c = C @ W @ Y * N2
        c_y = c - Y
        lmo_err = 0
        N = 0
        for ii in range(1):
            permutation = np.random.permutation(X.shape[0])
            for i in range(0, X.shape[0], M):
                indices = permutation[i:i + M]
                K_i = W[np.ix_(indices, indices)] * N2
                C_i = C[np.ix_(indices, indices)]
                c_y_i = c_y[indices]
                b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i
                # print(I_CW_inv.shape,c_y_i.shape)
                lmo_err += b_y.T @ K_i @ b_y
                N += 1
        return lmo_err[0, 0] / N / M**2

    def callback0(params, timer=None):
        global Nfeval, prev_norm, opt_params, opt_test_err
        if Nfeval % 1 == 0:
            n_params = len(params)
            al, bl = np.exp(params)
            L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
            if nystr:
                tmp_mat = eig_vec_K.T @ L
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    tmp_mat @ eig_vec_K / N2 + inv_eig_val_K) @ tmp_mat / N2
                alpha = alpha @ W_nystr_Y * N2
            else:
                LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
                alpha = LWL_inv @ L @ W @ Y
            test_L = bl * bl * np.exp(
                -test_L0 / al / al / 2)  # l(test_X,X,al,bl)
            pred_mean = test_L @ alpha
            if timer:
                return
            test_err = ((pred_mean - test_G)**2).mean(
            )  # ((pred_mean-test_G)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean()
            norm = alpha.T @ L @ alpha
        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_params is None:
                    opt_test_err = test_err
                    opt_params = params
                print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_test_err = test_err
        opt_params = params
        print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm,
              norm[0, 0])

    folder = ROOT_PATH + "/MMR_IVs/results/mendelian/" + sname + "/"
    os.makedirs(folder, exist_ok=True)
    train, dev, test = load_data(ROOT_PATH + "/data/mendelian/" + sname +
                                 '.npz',
                                 Torch=False)

    X = train.x
    Y = train.y
    Z = train.z
    test_X = test.x
    test_G = test.g

    t0 = time.time()
    EYEN = np.eye(X.shape[0])
    N2 = X.shape[0]**2
    W = np.load(ROOT_PATH +
                '/mendelian_precomp/{}_train_K.npy'.format(sname)) / N2
    L0, test_L0 = _sqdist(X, None), _sqdist(test_X, X)

    params0 = np.random.randn(2) / 10
    bounds = None  # [[0.01,10],[0.01,5]]
    if nystr:
        for _ in range(seed + 1):
            random_indices = np.sort(
                np.random.choice(range(W.shape[0]), nystr_M, replace=False))
        eig_val_K, eig_vec_K = nystrom_decomp(W * N2, random_indices)
        inv_eig_val_K = np.diag(1 / eig_val_K / N2)
        W_nystr = eig_vec_K @ np.diag(eig_val_K) @ eig_vec_K.T / N2
        W_nystr_Y = W_nystr @ Y

    obj_grad = value_and_grad(lambda params: LMO_err(params))
    try:
        res = minimize(obj_grad,
                       x0=params0,
                       bounds=bounds,
                       method='L-BFGS-B',
                       jac=True,
                       options={'maxiter': 5000},
                       callback=callback0)
    except Exception as e:
        print(e)
    PATH = ROOT_PATH + "/MMR_IVs/results/mendelian/" + sname + "/"
    np.save(PATH + 'LMO_errs_{}_nystr_{}.npy'.format(seed, train.x.shape[0]),
            [opt_params, prev_norm, opt_test_err])
def experiment(sname, seed, nystr=True):
    def LMO_err(params, M=2, verbal=False):
        global Nfeval
        params = np.exp(params)
        al, bl = params[:-1], params[
            -1]  # params[:int(n_params/2)], params[int(n_params/2):] #  [np.exp(e) for e in params]
        if train.x.shape[1] < 5:
            train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN
        else:
            train_L, dev_L = 0, 0
            for i in range(len(al)):
                train_L += train_L0[i] / al[i]**2
            train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN

        tmp_mat = train_L @ eig_vec_K
        C = train_L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 +
                                              inv_eig_val) @ tmp_mat.T / N2
        c = C @ W_nystr_Y * N2
        c_y = c - train.y
        lmo_err = 0
        N = 0
        for ii in range(1):
            permutation = np.random.permutation(train.x.shape[0])
            for i in range(0, train.x.shape[0], M):
                indices = permutation[i:i + M]
                K_i = train_W[np.ix_(indices, indices)] * N2
                C_i = C[np.ix_(indices, indices)]
                c_y_i = c_y[indices]
                b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i
                lmo_err += b_y.T @ K_i @ b_y
                N += 1
        return lmo_err[0, 0] / M**2

    def callback0(params):
        global Nfeval, prev_norm, opt_params, opt_test_err
        if Nfeval % 1 == 0:
            params = np.exp(params)
            print('params:', params)
            al, bl = params[:-1], params[-1]

            if train.x.shape[1] < 5:
                train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN
                test_L = bl**2 * np.exp(-test_L0 / al**2 / 2)
            else:
                train_L, test_L = 0, 0
                for i in range(len(al)):
                    train_L += train_L0[i] / al[i]**2
                    test_L += test_L0[i] / al[i]**2
                train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN
                test_L = bl * bl * np.exp(-test_L / 2)

            if nystr:
                tmp_mat = eig_vec_K.T @ train_L
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    tmp_mat @ eig_vec_K / N2 + inv_eig_val) @ tmp_mat / N2
                alpha = alpha @ W_nystr_Y * N2
            else:
                LWL_inv = chol_inv(train_L @ train_W @ train_L + train_L / N2 +
                                   JITTER * EYEN)
                alpha = LWL_inv @ train_L @ train_W @ train.y
            pred_mean = test_L @ alpha
            test_err = ((pred_mean - test.g)**2).mean()
            norm = alpha.T @ train_L @ alpha
        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_test_err is None:
                    opt_test_err = test_err
                    opt_params = params
                print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_test_err = test_err
        opt_params = params
        print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])

    train, dev, test = load_data(ROOT_PATH + '/data/' + sname + '/main.npz')
    del dev

    # avoid same indices when run on the cluster
    for _ in range(seed + 1):
        random_indices = np.sort(
            np.random.choice(range(train.x.shape[0]), nystr_M, replace=False))

    EYEN = np.eye(train.x.shape[0])
    N2 = train.x.shape[0]**2

    # precompute to save time on parallized computation
    if train.z.shape[1] < 5:
        ak = get_median_inter_mnist(train.z)
    else:
        ak = np.load(ROOT_PATH + '/mnist_precomp/{}_ak.npy'.format(sname))
    train_W = np.load(ROOT_PATH +
                      '/mnist_precomp/{}_train_K0.npy'.format(sname))
    train_W = (np.exp(-train_W / ak / ak / 2) + np.exp(
        -train_W / ak / ak / 200) + np.exp(-train_W / ak / ak * 50)) / 3 / N2
    if train.x.shape[1] < 5:
        train_L0 = _sqdist(train.x, None)
        test_L0 = _sqdist(test.x, train.x)
    else:
        L0s = np.load(ROOT_PATH + '/mnist_precomp/{}_Ls.npz'.format(sname))
        train_L0 = L0s['train_L0']
        # dev_L0 = L0s['dev_L0']
        test_L0 = L0s['test_L0']
        del L0s
    if train.x.shape[1] < 5:
        params0 = np.random.randn(2) * 0.1
    else:
        params0 = np.random.randn(len(train_L0) + 1) * 0.1
    bounds = None
    eig_val_K, eig_vec_K = nystrom_decomp(train_W * N2, random_indices)
    W_nystr_Y = eig_vec_K @ np.diag(eig_val_K) @ eig_vec_K.T @ train.y / N2
    inv_eig_val = np.diag(1 / eig_val_K / N2)
    obj_grad = value_and_grad(lambda params: LMO_err(params))
    res = minimize(obj_grad,
                   x0=params0,
                   bounds=bounds,
                   method='L-BFGS-B',
                   jac=True,
                   options={
                       'maxiter': 5000,
                       'disp': True,
                       'ftol': 0
                   },
                   callback=callback0)
    PATH = ROOT_PATH + "/MMR_IVs/results/" + sname + "/"
    os.makedirs(PATH, exist_ok=True)
    np.save(PATH + 'LMO_errs_{}_nystr.npy'.format(seed),
            [opt_params, prev_norm, opt_test_err])
Example #29
0
        # Show posterior marginals.
        plot_xs = np.reshape(np.linspace(-7, 7, 300), (300,1))
        pred_mean, pred_cov = predict(params, X, y, plot_xs)
        marg_std = np.sqrt(np.diag(pred_cov))
        ax.plot(plot_xs, pred_mean, 'b')
        ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]),
                np.concatenate([pred_mean - 1.96 * marg_std,
                               (pred_mean + 1.96 * marg_std)[::-1]]),
                alpha=.15, fc='Blue', ec='None')

        # Show samples from posterior.
        rs = npr.RandomState(0)
        sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10)
        ax.plot(plot_xs, sampled_funcs.T)

        ax.plot(X, y, 'kx')
        ax.set_ylim([-1.5, 1.5])
        ax.set_xticks([])
        ax.set_yticks([])
        plt.draw()
        plt.pause(1.0/60.0)

    # Initialize covariance parameters
    rs = npr.RandomState(0)
    init_params = 0.1 * rs.randn(num_params)

    print("Optimizing covariance parameters...")
    cov_params = minimize(value_and_grad(objective), init_params, jac=True,
                          method='CG', callback=callback)
    plt.pause(10.0)
    Fhat = forward_pass(W1, W2, W3, b1, b2, b3, x)
    # ########
    # Note that this function assumes a Gaussian likelihood (with variance 1)
    # You must modify this function to consider a categorical (generalized Bernoulli) likelihood
    # ########
    #nll = 0.5*np.sum(np.square(Fhat - y)) + 0.5*y.size*np.log(2.*np.pi)(Gaussian likelihood)

    #Implementation of Categorical (Generalized Bernoulli) Likelihood
    #Compute Inner Product Vector
    inner_prod_v = np.einsum('ij, ij->i', Fhat, y)
    nll = np.sum(inner_prod_v)
    nnll = -1 * nll
    return nnll


nll_gradients = value_and_grad(negative_log_likelihood,
                               argnum=[0, 1, 2, 3, 4, 5])
"""
    returns the output of `negative_log_likelihood` as well as the gradient of the 
    output with respect to all weights and biases
    Inputs:
        same as negative_log_likelihood (W1, W2, W3, b1, b2, b3, x, y)
    Outputs: (nll, (W1_grad, W2_grad, W3_grad, b1_grad, b2_grad, b3_grad))
        nll : output of `negative_log_likelihood`
        W1_grad : (M, 784) gradient of the nll with respect to the weights of first (hidden) layer
        W2_grad : (M, M) gradient of the nll with respect to the weights of second (hidden) layer
        W3_grad : (10, M) gradient of the nll with respect to the weights of third (output) layer
        b1_grad : (M, 1) gradient of the nll with respect to the biases of first (hidden) layer
        b2_grad : (M, 1) gradient of the nll with respect to the biases of second (hidden) layer
        b3_grad : (10, 1) gradient of the nll with respect to the biases of third (output) layer
     """
Example #31
0
    sampler_params = np.zeros(len(parser))
    parser.put(sampler_params, 'mean', init_mean)
    parser.put(sampler_params, 'log_stddev', init_stddevs)
    parser.put(sampler_params, 'log_stepsizes', init_log_stepsizes)
    parser.put(sampler_params, 'log_noise_sizes', init_log_noise_sizes)

    def get_batch_marginal_likelihood_estimate(sampler_params):
        samples, marginal_likelihood_estimates = sample_and_run_langevin(
            sampler_params, rs, num_samples)
        matplotlib.image.imsave("optimizing", (samples[0, :].reshape(
            (28, 28))).value)

        return np.mean(marginal_likelihood_estimates)

    ml_and_grad = value_and_grad(get_batch_marginal_likelihood_estimate)

    # Optimize Langevin parameters.
    # for i in xrange(num_sampler_optimization_steps):
    #     ml, dml = ml_and_grad(sampler_params)
    #     print "log marginal likelihood:", ml
    #     plot_sampler_params(sampler_params, 'sampler_params.png')
    #     sampler_params = sampler_params + sampler_learn_rate * dml
    #     print 'dml norm', np.linalg.norm(dml)
    #     print 'dml max', np.max(dml)
    #     fig = plt.figure()
    #     fig.clf()
    #     ax = fig.add_subplot(111)
    #     ax.plot(dml[-(2*num_langevin_steps):-1],'o')
    #     plt.savefig('dml.png')
Example #32
0
        logprobs = np.asarray(pred_fun(weights, train_inputs))
        for t in range(logprobs.shape[1]):
            training_text  = one_hot_to_string(train_inputs[:,t,:])
            predicted_text = one_hot_to_string(logprobs[:,t,:])
            print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' '))

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss(weights):
        return -loglike_fun(weights, train_inputs, train_inputs)

    def callback(weights):
        print("Train loss:", training_loss(weights))
        print_training_prediction(weights)

   # Build gradient of loss function using autograd.
    training_loss_and_grad = value_and_grad(training_loss)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(training_loss, init_weights)

    print("Training LSTM...")
    result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
                      options={'maxiter':train_iters}, callback=callback)
    trained_weights = result.x

    print("\nGenerating text from LSTM model...")
    num_letters = 30
    for t in range(20):
        text = ""
        for i in range(num_letters):
Example #33
0
File: lds.py Project: yahmadian/ssm
    def _fit_variational_em(self,
                            variational_posterior,
                            datas,
                            inputs,
                            masks,
                            tags,
                            learning=True,
                            alpha=.75,
                            optimizer="adam",
                            num_iters=100,
                            **kwargs):
        """
        Let gamma denote the emission parameters and theta denote the transition
        and initial discrete state parameters. This is a mix of EM and SVI:
            1. Sample x ~ q(x; phi)
            2. Compute L(x, theta') = E_p(z | x, theta)[log p(x, z; theta')]
            3. Set theta = (1 - alpha) theta + alpha * argmax L(x, theta')
            4. Set gamma = gamma + eps * nabla log p(y | x; gamma)
            5. Set phi = phi + eps * dx/dphi * d/dx [L(x, theta) + log p(y | x; gamma) - log q(x; phi)]
        """
        # Optimize the standard ELBO when updating gamma (emissions params)
        # and phi (variational params)
        T = sum([data.shape[0] for data in datas])

        def _objective(params, itr):
            if learning:
                self.emissions.params, variational_posterior.params = params
            else:
                variational_posterior.params = params

            obj = self._surrogate_elbo(variational_posterior, datas, inputs,
                                       masks, tags, **kwargs)
            return -obj / T

        # Initialize the parameters
        if learning:
            params = (self.emissions.params, variational_posterior.params)
        else:
            params = variational_posterior.params

        # Set up the progress bar
        elbos = [-_objective(params, 0) * T]
        pbar = trange(num_iters)
        pbar.set_description("Surrogate ELBO: {:.1f}".format(elbos[0]))

        # Run the optimizer
        step = dict(sgd=sgd_step, rmsprop=rmsprop_step,
                    adam=adam_step)[optimizer]
        state = None
        for itr in pbar:
            # Update the emission and variational posterior parameters
            params, val, g, state = step(value_and_grad(_objective), params,
                                         itr, state)
            elbos.append(-val * T)

            # Update progress bar
            pbar.set_description("Surrogate ELBO: {:.1f}".format(elbos[-1]))
            pbar.update()

        # Save the final emission and variational parameters
        if learning:
            self.emissions.params, variational_posterior.params = params
        else:
            variational_posterior.params = params

        return elbos
def experiment(sname, seed, datasize, nystr=False, args=None):
    np.random.seed(1)
    random.seed(1)

    def LMO_err(params, M=10):
        np.random.seed(2)
        random.seed(2)
        al, bl = np.exp(params)
        L = bl * bl * np.exp(-L0 / al / al / 2) * np.exp(
            -L1 / al / al / 2) + 1e-6 * EYEN
        if nystr:
            tmp_mat = L @ eig_vec_K
            C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 +
                                            inv_eig_val_K) @ tmp_mat.T / N2
            c = C @ W_nystr_Y * N2
        else:
            LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
            C = L @ LWL_inv @ L / N2
            c = C @ W @ Y * N2
        c_y = c - Y
        lmo_err = 0
        N = 0
        for ii in range(1):
            permutation = np.random.permutation(X.shape[0])
            for i in range(0, X.shape[0], M):
                indices = permutation[i:i + M]
                K_i = W[np.ix_(indices, indices)] * N2
                C_i = C[np.ix_(indices, indices)]
                c_y_i = c_y[indices]
                b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i
                lmo_err += b_y.T @ K_i @ b_y
                N += 1
        return lmo_err[0, 0] / N / M**2

    def callback0(params, timer=None):
        global Nfeval, prev_norm, opt_params, opt_test_err
        np.random.seed(3)
        random.seed(3)
        if Nfeval % 1 == 0:
            al, bl = params
            L = bl * bl * np.exp(-L0 / al / al / 2) * np.exp(
                -L1 / al / al / 2) + 1e-6 * EYEN
            if nystr:
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    eig_vec_K.T @ L @ eig_vec_K / N2 +
                    np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2
                alpha = alpha @ W_nystr @ Y * N2
            else:
                LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
                alpha = LWL_inv @ L @ W @ Y
                # L_W_inv = chol_inv(W*N2+L_inv)
            test_L = bl * bl * np.exp(-test_L0 / al / al / 2) * np.exp(
                -test_L1 / al / al / 2)
            pred_mean = test_L @ alpha
            if timer:
                return
            test_err = ((pred_mean - test_Y)**2).mean(
            )  # ((pred_mean-test_Y)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean()
            norm = alpha.T @ L @ alpha

        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_params is None:
                    opt_test_err = test_err
                    opt_params = params
                print(True, opt_params, opt_test_err, prev_norm)
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_test_err = test_err
        opt_params = params
        print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm)

    def get_causal_effect(params, do_A, w):
        "to be called within experiment function."
        np.random.seed(4)
        random.seed(4)
        al, bl = params
        L = bl * bl * np.exp(-L0 / al / al / 2) * np.exp(
            -L1 / al / al / 2) + 1e-6 * EYEN
        if nystr:
            alpha = EYEN - eig_vec_K @ np.linalg.inv(
                eig_vec_K.T @ L @ eig_vec_K / N2 +
                np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2
            alpha = alpha @ W_nystr @ Y * N2
        else:
            LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
            alpha = LWL_inv @ L @ W @ Y
            # L_W_inv = chol_inv(W*N2+L_inv)

        EYhat_do_A = []
        for a in do_A:
            a = np.repeat(a, [w.shape[0]]).reshape(-1, 1)
            w = w.reshape(-1, 1)
            aw = np.concatenate([a, w], axis=-1)
            ate_L0 = _sqdist(aw[:, 0:1], X[:, 0:1])
            ate_L1 = _sqdist(aw[:, 1:2], X[:, 1:2])
            ate_L = bl * bl * np.exp(-ate_L0 / al / al / 2) * np.exp(
                -ate_L1 / al / al / 2)
            h_out = ate_L @ alpha

            mean_h = np.mean(h_out).reshape(-1, 1)
            EYhat_do_A.append(mean_h)
            print('a = {}, beta_a = {}'.format(np.mean(a), mean_h))

        return np.concatenate(EYhat_do_A)

    # train,dev,test = load_data(ROOT_PATH+'/data/zoo/{}_{}.npz'.format(sname,datasize))

    # X = np.vstack((train.x,dev.x))
    # Y = np.vstack((train.y,dev.y))
    # Z = np.vstack((train.z,dev.z))
    # test_X = test.x
    # test_Y = test.g
    t1 = time.time()
    train, dev, test = load_data(ROOT_PATH + "/data/zoo/" + sname +
                                 '/main_{}.npz'.format(args.sem))
    # train, dev, test = train[:300], dev[:100], test[:100]
    t2 = time.time()
    print('t2 - t1 = ', t2 - t1)
    Y = np.concatenate((train.y, dev.y), axis=0).reshape(-1, 1)
    # test_Y = test.y
    AZ_train, AW_train = bundle_az_aw(train.a, train.z, train.w)
    AZ_test, AW_test = bundle_az_aw(test.a, test.z, test.w)
    AZ_dev, AW_dev = bundle_az_aw(dev.a, dev.z, test.w)

    X, Z = np.concatenate((AW_train, AW_dev), axis=0), np.concatenate(
        (AZ_train, AZ_dev), axis=0)
    test_X, test_Y = AW_test, test.y.reshape(-1,
                                             1)  # TODO: is test.g just test.y?

    t3 = time.time()
    print('t3 - t2', t3 - t2)
    EYEN = np.eye(X.shape[0])
    ak0, ak1 = get_median_inter_mnist(Z[:,
                                        0:1]), get_median_inter_mnist(Z[:,
                                                                        1:2])
    N2 = X.shape[0]**2
    W0, W1 = _sqdist(Z[:, 0:1], None), _sqdist(Z[:, 1:2], None)
    print('av kernel indicator: ', args.av_kernel)
    W = np.exp(-W0 / ak0 / ak0 / 2) * np.exp(-W1 / ak1 / ak1 / 2) / N2 if not args.av_kernel \
        else (np.exp(-W0 / ak0 / ak0 / 2) + np.exp(-W0 / ak0 / ak0 / 200) + np.exp(
        -W0 / ak0 / ak0 * 50)) / 3 / N2 * (np.exp(-W1 / ak1 / ak1 / 2) + np.exp(-W1 / ak1 / ak1 / 200) + np.exp(
        -W1 / ak1 / ak1 * 50)) / 3

    # W = (np.exp(-W0 / ak0 / ak0 / 2) + np.exp(-W0 / ak0 / ak0 / 200) + np.exp(
    #     -W0 / ak0 / ak0 * 50)) / 3 / N2 * (np.exp(-W1 / ak1 / ak1 / 2) + np.exp(-W1 / ak1 / ak1 / 200) + np.exp(
    #     -W1 / ak1 / ak1 * 50)) / 3   # TODO: recompute W for my case
    del W0, W1
    L0, test_L0 = _sqdist(X[:, 0:1], None), _sqdist(test_X[:, 0:1], X[:, 0:1])
    L1, test_L1 = _sqdist(X[:, 1:2], None), _sqdist(test_X[:, 1:2], X[:, 1:2])
    t4 = time.time()
    print('t4 - t3', t4 - t3)
    # measure time
    # callback0(np.random.randn(2)/10,True)
    # np.save(ROOT_PATH + "/MMR_IVs/results/zoo/" + sname + '/LMO_errs_{}_nystr_{}_time.npy'.format(seed,train.x.shape[0]),time.time()-t0)
    # return

    # params0 = np.random.randn(2)  # /10
    params0 = np.array([1., 1.])
    print('starting param: ', params0)
    bounds = None  # [[0.01,10],[0.01,5]]
    if nystr:
        for _ in range(seed + 1):
            random_indices = np.sort(
                np.random.choice(range(W.shape[0]), nystr_M, replace=False))
        eig_val_K, eig_vec_K = nystrom_decomp(W * N2, random_indices)
        inv_eig_val_K = np.diag(1 / eig_val_K / N2)
        W_nystr = eig_vec_K @ np.diag(eig_val_K) @ eig_vec_K.T / N2
        W_nystr_Y = W_nystr @ Y

    t5 = time.time()
    print('t5 - t4', t5 - t4)
    obj_grad = value_and_grad(lambda params: LMO_err(params))
    try:
        res = minimize(obj_grad,
                       x0=params0,
                       bounds=bounds,
                       method='L-BFGS-B',
                       jac=True,
                       options={'maxiter': 5000},
                       callback=callback0)
    # res stands for results (not residuals!).
    except Exception as e:
        print(e)

    PATH = ROOT_PATH + "/MMR_IVs/results/zoo/" + sname + "/"
    if not os.path.exists(PATH + str(date.today())):
        os.mkdir(PATH + str(date.today()))

    assert opt_params is not None
    params = opt_params
    do_A = np.load(ROOT_PATH + "/data/zoo/" + sname +
                   '/do_A_{}.npz'.format(args.sem))['do_A']
    EY_do_A_gt = np.load(ROOT_PATH + "/data/zoo/" + sname +
                         '/do_A_{}.npz'.format(args.sem))['gt_EY_do_A']
    w_sample = train.w
    EYhat_do_A = get_causal_effect(params=params, do_A=do_A, w=w_sample)
    plt.figure()
    plt.plot([i + 1 for i in range(20)], EYhat_do_A)
    plt.xlabel('A')
    plt.ylabel('EYdoA-est')
    plt.savefig(
        os.path.join(
            PATH, str(date.today()),
            'causal_effect_estimates_nystr_{}'.format(AW_train.shape[0]) +
            '.png'))
    plt.close()
    print('ground truth ate: ', EY_do_A_gt)
    visualise_ATEs(EY_do_A_gt,
                   EYhat_do_A,
                   x_name='E[Y|do(A)] - gt',
                   y_name='beta_A',
                   save_loc=os.path.join(PATH, str(date.today())) + '/',
                   save_name='ate_{}_nystr.png'.format(AW_train.shape[0]))
    causal_effect_mean_abs_err = np.mean(np.abs(EY_do_A_gt - EYhat_do_A))
    causal_effect_mae_file = open(
        os.path.join(PATH, str(date.today()),
                     "ate_mae_{}_nystrom.txt".format(AW_train.shape[0])), "a")
    causal_effect_mae_file.write(
        "mae_: {}\n".format(causal_effect_mean_abs_err))
    causal_effect_mae_file.close()

    os.makedirs(PATH, exist_ok=True)
    np.save(
        os.path.join(
            PATH, str(date.today()),
            'LMO_errs_{}_nystr_{}.npy'.format(seed, AW_train.shape[0])),
        [opt_params, prev_norm, opt_test_err])
Example #35
0
        reproj_err[i] = compute_reproj_err(cams[obs[i, 0]], X[obs[i, 1]], w[i],
                                           feats[i])

    w_err = 1. - np.square(w)

    return (reproj_err, w_err)


########## derivative extras #############


def compute_w_err(w):
    return 1. - w * w


compute_w_err_d = value_and_grad(compute_w_err)


def compute_reproj_err_wrapper(params, feat):
    X_off = BA_NCAMPARAMS
    return compute_reproj_err(params[0:X_off], params[X_off:X_off + 3],
                              params[-1], feat)


compute_reproj_err_d = jacobian(compute_reproj_err_wrapper)


def compute_ba_J(cams, X, w, obs, feats):
    p = obs.shape[0]
    reproj_err_d = []
    for i in range(p):
Example #36
0
    def _fit_bbvi(self,
                  variational_posterior,
                  datas,
                  inputs,
                  masks,
                  tags,
                  verbose=2,
                  learning=True,
                  optimizer="adam",
                  num_iters=100,
                  **kwargs):
        """
        Fit with black box variational inference using a
        Gaussian approximation for the latent states x_{1:T}.
        """
        # Define the objective (negative ELBO)
        T = sum([data.shape[0] for data in datas])

        def _objective(params, itr):
            if learning:
                self.params, variational_posterior.params = params
            else:
                variational_posterior.params = params

            obj = self._bbvi_elbo(variational_posterior, datas, inputs, masks,
                                  tags)
            return -obj / T

        # Initialize the parameters
        if learning:
            params = (self.params, variational_posterior.params)
        else:
            params = variational_posterior.params

        # Set up the progress bar
        elbos = [-_objective(params, 0) * T]
        pbar = ssm_pbar(num_iters, verbose, "LP: {:.1f}", [elbos[0]])

        # Run the optimizer
        step = dict(sgd=sgd_step, rmsprop=rmsprop_step,
                    adam=adam_step)[optimizer]
        state = None
        for itr in pbar:
            params, val, g, state = step(value_and_grad(_objective), params,
                                         itr, state)
            elbos.append(-val * T)

            # TODO: Check for convergence -- early stopping

            # Update progress bar
            if verbose == 2:
                pbar.set_description("ELBO: {:.1f}".format(elbos[-1]))
                pbar.update()

        # Save the final parameters
        if learning:
            self.params, variational_posterior.params = params
        else:
            variational_posterior.params = params

        return np.array(elbos)
Example #37
0
    ax_smart_full = fig.add_subplot(322, frameon=False)
    ax_smart_one = fig.add_subplot(324, frameon=False)
    ax_smart_two = fig.add_subplot(326, frameon=False)
    plt.show(block=False) 

    for initialization in initialization_set:

        init_params = .1 * npr.randn(total_num_params)
        deep_map = create_deep_map(init_params)
        if initialization:
            init_params = initialize(deep_map, X, num_pseudo_params)

        print("Optimizing covariance parameters...")
        objective = lambda params: -log_likelihood(params,X,y,n_samples)

        params = minimize(value_and_grad(objective), init_params, jac=True,
                              method='BFGS', callback=callback,options={'maxiter':1000})
        
        params = params['x']
        plot_xs = np.reshape(np.linspace(-5, 5, 300), (300,1))
        if initialization:
            ax_full = ax_smart_full
            ax_one = ax_smart_one
            ax_two = ax_smart_two
            title = "Two Layers, Smart Initialization"
        else:
            ax_full = ax_random_full
            ax_one = ax_random_one
            ax_two = ax_random_two  
            title = "Two Layers, Random Initialization"
        
Example #38
0
    return minimize_cb


def init():
    line.set_data([], [])
    point.set_data([], [])
    return line, point


def animate(i):
    line.set_data(*path[::, :i])
    point.set_data(*path[::, i - 1:i])
    return line, point


func = value_and_grad(lambda args: f(*args))

res = minimize(func,
               x0=x0,
               method='Newton-CG',
               jac=True,
               tol=1e-20,
               callback=make_minimize_cb(path_))

path = np.array(path_).T
path.shape

fig, ax = plt.subplots(figsize=(10, 6))

ax.contour(x,
           y,
def grad_rho(parameters, X_data, Y_data, sample_indices, kernel_keyword= "RBF", reg = 0.000001):
    grad_K = value_and_grad(rho, 1)
    rho_value, gradient = grad_K(parameters, X_data, Y_data, sample_indices, kernel_keyword, reg = reg)
    return rho_value, gradient
Example #40
0
    def __init__(self, NSIDE, npix, clv=True):
        """
        Args:
            NSIDE (int) : the healpix NSIDE parameter, must be a power of 2, less than 2**30
            npix (int) : number of pixel in the X and Y axis of the final projected map
            rot_velocity (float) : rotation velocity of the star in the equator in km/s
        
        Returns:
            None
        """
        self.NSIDE = int(NSIDE)
        self.npix = int(npix)
        self.hp_npix = hp.nside2npix(NSIDE)

        # self.rot_velocity = rot_velocity
        self.clv = clv

# Generate the indices of all healpix pixels
        self.indices = np.arange(hp.nside2npix(NSIDE), dtype='int')
        self.n_healpix_pxl = len(self.indices)

# Define the orthographic projector that generates the maps of the star on the plane of the sky
        self.projector = hp.projector.OrthographicProj(xsize=int(self.npix))

# This function returns the pixel associated with a vector (x,y,z). This is needed by the projector
        self.f_vec2pix = lambda x, y, z: hp.pixelfunc.vec2pix(int(self.NSIDE), x, y, z)

# Generate a mesh grid of X and Y points in the plane of the sky that covers only the observed hemisphere of the star
        x = np.linspace(-2.0,0.0,int(self.npix/2))
        y = np.linspace(-1.0,1.0,int(self.npix/2))
        X, Y = np.meshgrid(x,y)

# Rotational velocity vector (pointing in the z direction and unit vector)
        omega = np.array([0,0,1])

# Compute the radial vector at each position in the map and the projected velocity on the plane of the sky
        radial_vector = np.array(self.projector.xy2vec(X.flatten(), Y.flatten())).reshape((3,int(self.npix/2),int(self.npix/2)))        
        self.vel_projection = np.cross(omega[:,None,None], radial_vector, axisa=0, axisb=0)[:,:,0]

# Compute the mu angle (astrocentric angle)
        self.mu_angle = radial_vector[0,:,:]        
        
# Read all Kurucz models from the database. Hardwired temperature and mu angles
        print("Reading Kurucz spectra...")
        self.T = 3500 + 250 * np.arange(27)
        self.mus = np.array([1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,0.05,0.02])[::-1]

        for i in tqdm(range(27)):
            f = 'kurucz_models/RESULTS/T_{0:d}_logg4.0_feh0.0.spec'.format(self.T[i])
            vel, _, spec = _read_kurucz_spec(f)

            if (i == 0):
                self.nlambda, self.nmus = spec.shape
                self.velocity = np.zeros((self.nlambda))
                self.spectrum = np.zeros((27,self.nmus,self.nlambda))

            self.velocity = vel
            self.spectrum[i,:,:] = spec[:,::-1].T

# Generate a fake temperature map in the star using spherical harmonics
        # self.temperature_map = 5000 * np.ones(self.npix)
        # self.temperature_map = 5000 + 250 * hp.sphtfunc.alm2map(np.ones(10,dtype='complex'),self.NSIDE) #np.random.rand(self.n_healpix_pxl) * 2000 + 5000 #

        self.temperature_map = 5000 * np.ones(self.hp_npix)
        self.coeffs = hp.sphtfunc.map2alm(self.temperature_map)

        self.velocity_per_pxl = self.velocity[1] - self.velocity[0]

        self.freq_grid = np.fft.fftfreq(self.nlambda)

        self.gradient = value_and_grad(self.loss)
		k_xx = calcSigma(x,x,l)
		marg_data = 0.5* np.dot(y.T,np.dot(np.linalg.inv(k_xx+ (noise_var**2)*np.identity(k_xx.shape[0])),y)) - 0.5 * \
			np.log(np.linalg.det(np.linalg.inv(k_xx+ (noise_var**2)*np.identity(k_xx.shape[0])))) - (len(y)*0.5) * np.log(2*np.pi) 

		return -1.0*marg_data


	###################################
	####         Gradient          ####
	###################################

	g_ml = lambda l: marg_likelihood(x_train,y_train,l)

	init_params = 0.1 * rs.randn(num_params)
	grad_ml = grad(g_ml)
	cov_params = minimize(value_and_grad(g_ml),init_params,jac=True,
						method = 'CG')

	print marg_likelihood(x_train,y_train,length_scale)
	print grad_ml(length_scale)
	print "Initial Parameters: ", init_params
	print "Optimized Parameters: ", cov_params.x

	opt_length_scale = np.exp(cov_params.x[0])

	import pdb
	pdb.set_trace()

	Omg = np.linalg.inv( K + ((noise_var/2.)**2*np.identity(n_train)) )
	Beta = np.dot(Omg,y_train).reshape((-1,1))
Example #42
0
    def distance_from_target_image(smoke):
        return np.mean((target - smoke)**2)

    def convert_param_vector_to_matrices(params):
        vx = np.reshape(params[:(rows*cols)], (rows, cols))
        vy = np.reshape(params[(rows*cols):], (rows, cols))
        return vx, vy

    def objective(params):
        init_vx, init_vy = convert_param_vector_to_matrices(params)
        final_smoke = simulate(init_vx, init_vy, init_smoke, simulation_timesteps)
        return distance_from_target_image(final_smoke)

    # Specify gradient of objective function using autograd.
    objective_with_grad = value_and_grad(objective)

    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111, frameon=False)

    def callback(params):
        init_vx, init_vy = convert_param_vector_to_matrices(params)
        simulate(init_vx, init_vy, init_smoke, simulation_timesteps, ax)

    print("Optimizing initial conditions...")
    result = minimize(objective_with_grad, init_dx_and_dy, jac=True, method='CG',
                      options={'maxiter':25, 'disp':True}, callback=callback)

    print("Rendering optimized flow...")
    init_vx, init_vy = convert_param_vector_to_matrices(result.x)
    simulate(init_vx, init_vy, init_smoke, simulation_timesteps, ax, render=True)
Example #43
0
def run_expt(config, loss_opt=0):
    ttl = config_to_str(config)
    print '\nstarting experiment {}'.format(ttl)
    print config
    
    Xtrain, Ytrain, params_true, true_fun, fun_name = demo.make_data_linreg_1d(config['N'], config['fun_type'])
    data_dim = Xtrain.shape[1]
    N = Xtrain.shape[0]
    Xtrain, Ytrain = opt.shuffle_data(Xtrain, Ytrain)
        
    model_type = config['model_type']
    if model_type == 'linear':
        model = LinregModel(data_dim, add_ones=True)
        params, loss = model.ols_fit(Xtrain, Ytrain)
    elif model_type[0:3] == 'mlp':
        _, layer_sizes = model_type.split(':')
        layer_sizes = [int(n) for n in layer_sizes.split('-')]
        model = MLP(layer_sizes, 'regression', L2_reg=0.001, Ntrain=N) 
    else:
        raise ValueError('unknown model type {}'.format(model_type))
            
    initial_params = model.init_params() 
    param_dim = len(initial_params)

    plot_data = (data_dim == 1)
    plot_params = (param_dim == 2)
    nplots = 2
    if plot_data: 
        nplots += 1
    if plot_params:
        nplots += 1
    plot_rows, plot_cols = util.nsubplots(nplots)
    
    if config['optimizer'] == 'BFGS':
        obj_fun = lambda params: model.PNLL(params, Xtrain, Ytrain)
        logger = opt.OptimLogger(lambda params, iter: obj_fun(params), store_freq=1, print_freq=10)   
        params = opt.bfgs(autograd.value_and_grad(obj_fun), initial_params, logger.callback, config['num_epochs'])
                     
    if config['optimizer'] == 'SGD':
        B = config['batch_size']
        M = N / B # num_minibatches_per_epoch (num iter per epoch)
        max_iters = config['num_epochs'] * M
        
        grad_fun = opt.build_batched_grad(model.gradient, config['batch_size'], Xtrain, Ytrain)
        #obj_fun = opt.build_batched_grad(model.PNLL, config['batch_size'], Xtrain, Ytrain)
        obj_fun = lambda params, iter: model.PNLL(params, Xtrain, Ytrain)
        logger = opt.OptimLogger(obj_fun, store_freq=M, print_freq=M*10, store_params=plot_params)         
            
        if config.has_key('lr_fun'):
            if config['lr_fun'] == 'exp':
                lr_fun = lambda iter: opt.lr_exp_decay(iter, config['init_lr'], config['lr_decay']) 
            elif config['lr_fun'] == 'const':
                lr_fun = opt.const_lr(config['init_lr']) 
            else:
                raise ValueError('Unknown lr-fun {}'.format(lr_fun))

        #sgd_fun = config['sgd-fun']
        #params = sgd_fun(grad_fun, initial_params, logger.callback, \
        #    max_iters, lr_fun, *config['args'])
        
        if config['sgd-method'] == 'momentum':
            params = opt.sgd(grad_fun, initial_params, logger.callback, \
            max_iters, lr_fun, config['mass'])
        elif config['sgd-method'] == 'RMSprop':
            params = opt.rmsprop(grad_fun, initial_params, logger.callback, \
                max_iters, lr_fun, config['grad_sq_decay'])
        elif config['sgd-method'] == 'ADAM':
            params = opt.adam(grad_fun, initial_params, logger.callback, \
                max_iters, lr_fun, config['grad_decay'], config['grad_sq_decay'])
        elif config['sgd-method'] == 'AutoADAM':
            eval_fn = lambda params: model.PNLL(params, Xtrain, Ytrain)
            params, lr, scores = opt.autoadam(grad_fun, initial_params, logger.callback, \
                max_iters, eval_fn, config['auto-method'])
            config['init_lr'] = lr
            config['lr_fun'] = 'const'
            ttl = config_to_str(config)
            print 'autoadam: chose {:0.3f} as lr'.format(lr)
            print scores
        else:
            raise ValueError('Unknown SGD method {}'.format(config['method']))
        
            

    training_loss = model.PNLL(params, Xtrain, Ytrain)
    print 'finished fitting, training loss {:0.3f}, {} obj calls, {} grad calls'.\
        format(training_loss, model.num_obj_fun_calls, model.num_grad_fun_calls)
    
    fig = plt.figure()
    ax = fig.add_subplot(plot_rows, plot_cols, 1)
    opt.plot_loss_trace(logger.obj_trace, loss_opt, ax)
    ax.set_title('final objective {:0.3f}'.format(training_loss))
    ax.set_xlabel('epochs')
    
    ax = fig.add_subplot(plot_rows, plot_cols, 2)
    ax.plot(logger.grad_norm_trace)
    ax.set_title('gradient norm vs num updates')
    
    if plot_data:
        ax = fig.add_subplot(plot_rows, plot_cols, 3)
        predict_fun = lambda X: model.predictions(params, X)
        demo.plot_data_and_predictions_1d(Xtrain, Ytrain, true_fun, predict_fun, ax)
    
    if plot_params:
        ax = fig.add_subplot(plot_rows, plot_cols, 4)
        loss_fun = lambda w0, w1: model.PNLL(np.array([w0, w1]), Xtrain, Ytrain)
        demo.plot_error_surface_2d(loss_fun, params, params_true, config['fun_type'], ax)
        demo.plot_param_trace_2d(logger.param_trace, ax)        
        
    fig.suptitle(ttl)
    folder = 'figures/linreg-sgd'        
    fname = os.path.join(folder, 'linreg_1d_sgd_{}.png'.format(ttl))
    plt.savefig(fname)
    return training_loss
def RMSprop(g,w,x_train,y_train,alpha,max_its,batch_size,**kwargs): 
    verbose = True
    if 'verbose' in kwargs:
        verbose = kwargs['verbose']
       
    # rmsprop params
    gamma=0.9
    eps=10**-8
    if 'gamma' in kwargs:
        gamma = kwargs['gamma']
    if 'eps' in kwargs:
        eps = kwargs['eps']
    
    # flatten the input function, create gradient based on flat function
    g_flat, unflatten, w = flatten_func(g, w)
    grad = value_and_grad(g_flat)

    # initialize average gradient
    avg_sq_grad = np.ones(np.size(w))
    if 'ave_sq_grad' in kwargs:
        avg_sq_grad = kwargs['avg_sq_grad']
    
    # record history
    num_train = y_train.shape[1]
    w_hist = [unflatten(w)]
    train_hist = [g_flat(w,x_train,y_train,np.arange(num_train))]
    
    # how many mini-batches equal the entire dataset?
    num_batches = int(np.ceil(np.divide(num_train, batch_size)))

    # over the line
    for k in range(max_its):                   
        # loop over each minibatch
        start = timer()
        train_cost = 0
        for b in range(num_batches):
            # collect indices of current mini-batch
            batch_inds = np.arange(b*batch_size, min((b+1)*batch_size, num_train))
            
            # plug in value into func and derivative
            cost_eval,grad_eval = grad(w,x_train,y_train,batch_inds)
            grad_eval.shape = np.shape(w)
            
            # update exponential average of past gradients
            avg_sq_grad = gamma*avg_sq_grad + (1 - gamma)*grad_eval**2 
    
            # take descent step 
            w = w - alpha*grad_eval / (avg_sq_grad**(0.5) + eps)

        end = timer()
        
        # update training and validation cost
        train_cost = g_flat(w,x_train,y_train,np.arange(num_train))

        # record weight update, train and val costs
        w_hist.append(unflatten(w))
        train_hist.append(train_cost)

        if verbose == True:
            print ('step ' + str(k+1) + ' done in ' + str(np.round(end - start,1)) + ' secs, train cost = ' + str(np.round(train_hist[-1][0],4)))

    if verbose == True:
        print ('finished all ' + str(max_its) + ' steps')
    return w_hist,train_hist,avg_sq_grad
Example #45
0
        logprobs = np.asarray(pred_fun(weights, train_inputs))
        for t in range(logprobs.shape[1]):
            training_text  = one_hot_to_string(train_inputs[:,t,:])
            predicted_text = one_hot_to_string(logprobs[:,t,:])
            print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' '))

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss(weights):
        return -loglike_fun(weights, train_inputs, train_inputs)

    def callback(weights):
        print("Train loss:", training_loss(weights))
        print_training_prediction(weights)

   # Build gradient of loss function using autograd.
    training_loss_and_grad = value_and_grad(training_loss)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(training_loss, init_weights)

    print("Training LSTM...")
    result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
                      options={'maxiter':train_iters}, callback=callback)
    trained_weights = result.x

    print("\nGenerating text from LSTM model...")
    num_letters = 30
    for t in range(20):
        text = ""
        for i in range(num_letters):
Example #46
0
    data = make_pinwheel_data()
    def objective(params):
        return -log_marginal_likelihood(params, data)

    def plot_gmm(params, ax, num_points=100):
        angles = np.expand_dims(np.linspace(0, 2*np.pi, num_points), 1)
        xs, ys = np.cos(angles), np.sin(angles)
        circle_pts = np.concatenate([xs, ys], axis=1) * 2.0
        for log_proportion, mean, chol in zip(*unpack_params(params)):
            cur_pts = mean + np.dot(circle_pts, chol)
            ax.plot(cur_pts[:, 0], cur_pts[:, 1], '-')

    fig = plt.figure(figsize=(12,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.show(block=False)

    def callback(params):
        print("Log likelihood {}".format(-objective(params)))
        ax.cla()
        ax.plot(data[:, 0], data[:, 1], 'bx')
        ax.set_xticks([])
        ax.set_yticks([])
        plot_gmm(params, ax)
        plt.draw()
        plt.pause(1.0/60.0)

    # Initialize and optimize model.
    rs = npr.RandomState(0)
    init_params = rs.randn(num_gmm_params) * 0.1
    minimize(value_and_grad(objective), init_params, jac=True, method='CG', callback=callback)
Example #47
0
    p = obs.shape[0]
    reproj_err = np.empty((p,2))
    for i in range(p):
        reproj_err[i] = compute_reproj_err(cams[obs[i,0]],X[obs[i,1]],w[i],feats[i])

    w_err = 1. - np.square(w)

    return (reproj_err, w_err)

########## derivative extras #############


def compute_w_err(w):
    return 1. - w*w

compute_w_err_d = value_and_grad(compute_w_err)

def compute_reproj_err_wrapper(params,feat):
    X_off = BA_NCAMPARAMS
    return compute_reproj_err(params[0:X_off],params[X_off:X_off+3],params[-1],feat)
compute_reproj_err_d = jacobian(compute_reproj_err_wrapper)

def compute_ba_J(cams, X, w, obs, feats):
    p = obs.shape[0]
    reproj_err_d = []
    for i in range(p):
        params = np.hstack((cams[obs[i,0]],X[obs[i,1]],w[i]))
        reproj_err_d.append(compute_reproj_err_d(params,feats[i]))

    w_err_d = []
    for curr_w in w:
def optimize_newton(fo, diagonal, random_sample, var_sample, tol, \
       num_intents, num_var_samples, T, joint_sample_x, joint_sample_y, \
          var_samples_x, var_samples_y, frame, num_peds, time_array, \
       ess, top_Z_indices, robot_mu_x, robot_mu_y, ped_mu_x, ped_mu_y, \
              cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
              cov_ped_x, cov_ped_y, inv_cov_ped_x, inv_cov_ped_y, \
              one_over_cov_sum_x, one_over_cov_sum_y, \
              one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, \
              ll_converge, vg, hess_opt, opt_iter_robot, opt_iter_all, \
              x_ped, y_ped, agent_disrupt, robot_agent_disrupt, opt_method):
    if random_sample:
        f = [0. for _ in range(num_intents + 1)]
        ll = [0. for _ in range(num_intents + 1)]
    if var_sample:
        f = [0. for _ in range(2 * num_var_samples + 1)]
        ll = [0. for _ in range(2 * num_var_samples + 1)]

    ped_mu_x_ess = [0. for _ in range(ess)]
    ped_mu_y_ess = [0. for _ in range(ess)]

    inv_cov_ped_x_ess = [0. for _ in range(ess)]
    inv_cov_ped_y_ess = [0. for _ in range(ess)]

    one_over_cov_sum_x_ess = [0. for _ in range(ess)]
    one_over_cov_sum_y_ess = [0. for _ in range(ess)]

    one_over_std_sum_x_ess = [0. for _ in range(ess)]
    one_over_std_sum_y_ess = [0. for _ in range(ess)]

    for ped in range(ess):
        top = top_Z_indices[ped]
        ped_mu_x_ess[ped] = ped_mu_x[top]
        ped_mu_y_ess[ped] = ped_mu_y[top]

        inv_cov_ped_x_ess[ped] = inv_cov_ped_x[top]
        inv_cov_ped_y_ess[ped] = inv_cov_ped_y[top]

        one_over_cov_sum_x_ess[ped] = one_over_cov_sum_x[top]
        one_over_cov_sum_y_ess[ped] = one_over_cov_sum_y[top]

        # one_over_std_sum_x_ess[ped] = one_over_std_sum_x[top]
        # one_over_std_sum_y_ess[ped] = one_over_std_sum_y[top]
    t0 = time.time()

    if random_sample:
        for intent in range(num_intents + 1):
            if intent == 0:
                x0 = robot_mu_x
                x0 = np.concatenate((x0, robot_mu_y))
                for ped in range(ess):
                    top = top_Z_indices[ped]
                    x0 = np.concatenate((x0, ped_mu_x[top]))
                    x0 = np.concatenate((x0, ped_mu_y[top]))
            else:
                x0 = joint_sample_x[num_peds, intent - 1, :]
                x0 = np.concatenate((x0, joint_sample_y[num_peds,
                                                        intent - 1, :]))
                for ped in range(ess):
                    top = top_Z_indices[ped]
                    x0 = np.concatenate((x0, joint_sample_x[top,
                                                            intent - 1, :]))
                    x0 = np.concatenate((x0, joint_sample_y[top,
                                                            intent - 1, :]))
            if opt_iter_robot or opt_iter_all:
                f[intent] = optimize_iterate(fo, tol, diagonal, frame, x0, num_peds, ess,\
                               robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                               cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                               cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                              one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                         one_over_cov_sumij_x, one_over_cov_sumij_y, \
                               normalize, ll_converge, T, opt_iter_robot, opt_iter_all)
                if diagonal:
                    ll[intent] = so_diagonal.ll(f[intent], num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                            one_over_cov_sumij_x, one_over_cov_sumij_y, \
                                normalize, T)
                else:
                    if fo:
                        ll[intent] = fo_dense.ll(f[intent], num_peds, ess,\
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,
                                   normalize)
                    else:
                        ll[intent] = so_dense.ll(f[intent], num_peds, ess,\
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                               one_over_cov_sumij_x, one_over_cov_sumij_y, \
                                   normalize, T)
            else:
                if diagonal:
                    f[intent] = sp.optimize.minimize(so_diagonal.ll, x0, \
                         args=(num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, \
                                inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                                one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T), \
                                method=opt_method, jac=so_diagonal.d_ll, \
                                hess=so_diagonal.dd_ll, \
                          options={'xtol': tol})
                    #trust-ncg--VERY SLOW
                    #trust-krylov---VERY SLOW
                    #Newton-CG---.56 SECONDS, GOOD RESULT
                    #trust-exact---.52 seconds.
                    ll[intent] = so_diagonal.ll(f[intent].x, num_peds, ess, \
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                            one_over_cov_sumij_x, one_over_cov_sumij_y, \
                                normalize, T)
                else:
                    if fo:
                        f[intent] = sp.optimize.minimize(fo_dense.ll, x0, args=(num_peds, ess,\
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,
                                   normalize), \
                                   method=opt_method, jac=so_dense.d_ll, hess=fo_dense.dd_ll, \
                              options={'xtol': tol})
                        ll[intent] = fo_dense.ll(f[intent].x, num_peds, ess, \
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,
                                   normalize)
                    else:
                        f[intent] = sp.optimize.minimize(so_dense.ll, x0, args=(num_peds, ess,\
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                               one_over_cov_sumij_x, one_over_cov_sumij_y, \
                                   normalize, T), \
                                   method=opt_method, jac=so_dense.d_ll, hess=so_dense.dd_ll, \
                              options={'xtol': tol})
                        ll[intent] = so_dense.ll(f[intent].x, num_peds, ess, \
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                               one_over_cov_sumij_x, one_over_cov_sumij_y, \
                                   normalize, T)
            ll[intent] = math.trunc(ll[intent] * 1e3) / 1e3

            print('intent =', intent, end=" ", flush=True)
    if var_sample:
        # high_value_var_sampler(var_samples_x, var_samples_y)
        for var in range(2 * num_var_samples + 1):
            if var == 0:
                x0 = robot_mu_x
                x0 = np.concatenate((x0, robot_mu_y))
                for ped in range(ess):
                    top = top_Z_indices[ped]
                    x0 = np.concatenate((x0, ped_mu_x[top]))
                    x0 = np.concatenate((x0, ped_mu_y[top]))
            else:
                x0 = var_samples_x[num_peds, var - 1, :]
                x0 = np.concatenate((x0, var_samples_y[num_peds, var - 1, :]))
                for ped in range(ess):
                    top = top_Z_indices[ped]
                    x0 = np.concatenate((x0, var_samples_x[top, var - 1, :]))
                    x0 = np.concatenate((x0, var_samples_y[top, var - 1, :]))
            if opt_iter_robot or opt_iter_all:
                print('OPT ITER FO', fo)
                f[var] = optimize_iterate(fo, tol, diagonal, frame, x0, num_peds, ess,\
                               robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                               cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                               cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                              one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                         one_over_cov_sumij_x, one_over_cov_sumij_y, \
                               normalize, ll_converge, T, opt_iter_robot, opt_iter_all)
                if diagonal:
                    ll[var] = so_diagonal.ll(f[var], num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                            one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T)
                else:
                    if fo:
                        ll[var] = fo_dense.ll(f[var], num_peds, ess,\
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                               normalize)
                    else:
                        ll[var] = so_dense.ll(f[var], num_peds, ess,\
                                   robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                   cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                   cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                   one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                               one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T)
            else:
                if diagonal:
                    f[var] = sp.optimize.minimize(so_diagonal.ll, x0, args=(num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                            one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T), \
                                method=opt_method, jac=so_diagonal.d_ll, \
                                hess=so_diagonal.dd_ll, \
                           options={'xtol': tol})
                    ll[var] = so_diagonal.ll(f[var].x, num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                            one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T)
                elif vg:
                    print('VALUE AND GRAD')
                    print('')
                    f[var] = sp.optimize.minimize(value_and_grad(so_dense.ll), x0, \
                           args=(num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, \
                                inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                                one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T), \
                        jac=True, method='BFGS', options={'xtol': 1e-8, 'disp': True})
                    # f[var] = sp.optimize.minimize(so_dense.ll, x0, \
                    # 							args=(num_peds, ess,\
                    #             robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                    #       	   	 cov_robot_x, cov_robot_y, \
                    #             inv_cov_robot_x, inv_cov_robot_y, \
                    #             cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                    #             one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                    #             one_over_cov_sumij_x, one_over_cov_sumij_y, normalize), \
                    # 				jac=so_dense.d_ll, method='BFGS', options={'xtol': 1e-8, 'disp': True})
                    ll[var] = so_dense.ll(f[var].x, num_peds, ess,\
                                robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
                                one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T)
                elif hess_opt:
                    if fo:
                        print('HAND DERIVED SCIPY HESS OPT FO')
                        print('')
                        f[var] = sp.optimize.minimize(fo_dense.ll, x0, args=(num_peds, ess,\
                                  robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                  cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                  cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                  one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                              normalize), \
                                  method=opt_method, jac=fo_dense.d_ll, hess=fo_dense.dd_ll, \
                            options={'xtol': tol})
                        ll[var] = fo_dense.ll(f[var].x, num_peds, ess,\
                                  robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                  cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                  cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                  one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                              normalize)
                    else:
                        print('HAND DERIVED SCIPY HESS OPT SO')
                        print('')
                        f[var] = sp.optimize.minimize(so_dense.ll, x0, args=(num_peds, ess,\
                                  robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                  cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                  cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                  one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                               one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T), \
                                  method=opt_method, jac=so_dense.d_ll, hess=so_dense.dd_ll, \
                            options={'xtol': tol})
                        ll[var] = so_dense.ll(f[var].x, num_peds, ess,\
                                  robot_mu_x, robot_mu_y, ped_mu_x_ess, ped_mu_y_ess, \
                                  cov_robot_x, cov_robot_y, inv_cov_robot_x, inv_cov_robot_y, \
                                  cov_ped_x, cov_ped_y, inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
                                  one_over_cov_sum_x_ess, one_over_cov_sum_y_ess,  \
                               one_over_cov_sumij_x, one_over_cov_sumij_y, normalize, T)
            if not math.isinf(ll[var]) and not math.isnan(ll[var]):
                ll[var] = math.trunc(ll[var] * 1e3) / 1e3

            print('variance sample = ', var, end=" ", flush=True)
        #######################HAND ROLLED GRAD+HESS
        # f = optimize_iterate(frame, x0, num_peds, ess,\
        # 	                 robot_mu_x, robot_mu_y, \
        # 	                 ped_mu_x_ess, ped_mu_y_ess, \
        # 	                 inv_cov_robot_x, inv_cov_robot_y, \
        # 	                 inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
        # 	                 one_over_cov_sum_x_ess, \
        # one_over_cov_sum_y_ess, \
        # 	                 one_over_std_sum_x_ess, \
        # one_over_std_sum_y_ess)
        #######################SCIPY LL+GRAD+HESS

        # f[intent] = sp.optimize.minimize(so_diagonal.ll, x0, \
        #            		 args=(num_peds, ess,\
        #                  robot_mu_x, robot_mu_y, \
        #                  ped_mu_x_ess, ped_mu_y_ess, \
        #                 	 cov_robot_x, cov_robot_y, \
        #                  inv_cov_robot_x, inv_cov_robot_y, \
        #                  cov_ped_x, cov_ped_y, \
        #                  inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
        #                  one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
        #                  one_over_std_sum_x_ess, one_over_std_sum_y_ess), \
        #                  method=opt_method, jac=so_diagonal.d_ll, \
        # hess=so_diagonal.dd_ll)
        # 				 # options={'xtol': tol})
        # ll[intent] = so_diagonal.ll(f[intent].x, num_peds, ess,\
        # 	                 robot_mu_x, robot_mu_y, \
        # 	                 ped_mu_x_ess, ped_mu_y_ess, \
        # 	                 cov_robot_x, cov_robot_y, \
        # 	                 inv_cov_robot_x, inv_cov_robot_y, \
        # 	                 cov_ped_x, cov_ped_y, \
        # 	                 inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
        # 	                 one_over_cov_sum_x_ess, one_over_cov_sum_y_ess, \
        # 	                 one_over_std_sum_x_ess, one_over_std_sum_y_ess)
        # ll[intent] = math.trunc(ll[intent]*1e3)/1e3

        # print(intent, end =" ", flush=True)
        # newton iterate, tol=1e-8, num_peds: .138+/-.077s
        ######################### TIMING ON DIFFERENT APPROACHES
        # num_peds, Tdex_max=25,no collisions:
        # trust-krlov w/ gtol=1e-8 .708+/-.223s
        #      trust-krylov: no tol: .523+/-.192s
        # Newton-CG: xtol=1e-8: 0.701+/-.229s
        #   Newton-CG, no tol: .568+/-.243s
        # newton-cg/ess=True, xtol=1e-8, .227+/-.099
        # 	NCG/ess=True, no xtol .2+/-.098
        # Newton-CG, gtol=1e-8: 0.943+/-0.419s
        #     Newton-CG, no tol: .791+/-.288s
        # 	  Newton-CG,ess=True, .116+/-.059
        # trust-ncg, gtol=1e-8: 1.622+/-0.962s
        #  trust-ncg, no tol: .539+/-.123s
        #######################SCIPY LL+GRAD
        # f = sp.optimize.minimize(so_diagonal.ll, x0, \
        # 				args=(num_peds, ess,\
        #                  robot_mu_x, robot_mu_y, \
        #                  ped_mu_x_ess, ped_mu_y_ess, \
        #                  inv_cov_robot_x, inv_cov_robot_y, \
        #                  inv_cov_ped_x_ess, inv_cov_ped_y_ess, \
        #                  one_over_cov_sum_x_ess, \
        # one_over_cov_sum_y_ess, \
        #                  one_over_std_sum_x_ess, \
        # one_over_std_sum_y_ess), \
        #                     method='BFGS', jac=so_diagonal.d_ll, \
        #                     options={'disp': True})
        #######################SCIPY LL
        # f = sp.optimize.minimize(\
        #                 value_and_grad(ll_diag_slice_grad), x0, \
        #                 jac=True, method='BFGS',\
        # options={'xtol': 1e-8, 'disp': True})

    def coupling(f, x0, one_over_cov_sum_x, one_over_cov_sum_y):
        n = 2
        uncoupling = 0.
        for ped in range(ess):
            vel_x = f[:T] - x0[n * T:(n + 1) * T]
            vel_y = f[T:2 * T] - x0[(n + 1) * T:(n + 2) * T]
            n = n + 2

            vel_x_2 = np.power(vel_x, 2)
            vel_y_2 = np.power(vel_y, 2)

            quad_x = np.multiply(vel_x_2, np.diag(one_over_cov_sum_x[ped]))
            quad_y = np.multiply(vel_y_2, np.diag(one_over_cov_sum_y[ped]))

            Z_x = np.exp(-0.5 * quad_x)
            Z_y = np.exp(-0.5 * quad_y)

            Z = np.multiply(Z_x, Z_y)

            log_znot = np.sum(np.log1p(-Z))
            uncoupling = uncoupling + log_znot
        return -1 * uncoupling  #WE WANT TO MAKE UNCOUPLING LARGE; SO -UNCOUPLING SHOULD
        # BE SMALL.  LARGE VALUE OF -UNCOUPLING MEANS LOTS OF COUPLING

    global_optima_dex = np.argmin(ll)
    if opt_iter_robot or opt_iter_all:
        agent_disrupt[frame] = np.linalg.norm(f[global_optima_dex][2 * T] -
                                              x0[2 * T:])
        robot_agent_disrupt[frame] = coupling(f[global_optima_dex], x0, \
                           one_over_cov_sum_x, one_over_cov_sum_y)
    else:
        agent_disrupt[frame] = np.linalg.norm(f[global_optima_dex].x[2 * T] -
                                              x0[2 * T:])
        robot_agent_disrupt[frame] = coupling(f[global_optima_dex].x, x0, \
                           one_over_cov_sum_x, one_over_cov_sum_y)

    opt_time = time.time() - t0
    time_array[frame] = opt_time
    ave_time = math.trunc(1e3 * np.mean(time_array[:frame + 1])) / 1e3
    std_time = math.trunc(1e3 * np.std(time_array[:frame + 1])) / 1e3

    return f, ll, opt_time, time_array, ave_time, std_time, \
        agent_disrupt, robot_agent_disrupt
    def fit(self, x_ND, x_valid_ND=None, verbose=True):
        ''' Fit this estimator to provided training data using LBFGS algorithm

        Args
        ----
        x_ND : 2D array, shape (N, D)
            Dataset used for training.
            Each row is an observed feature vector of size D
        x_valid_ND : 2D array, shape (Nvalid, D), optional
            Optional, dataset used for heldout validation.
            Each row is an observed feature vector of size D
            If provided, used to measure heldout likelihood at every checkpoint.
            These likelihoods will be recorded in self.history['valid_neg_log_lik_per_pixel']
        verbose : boolean, optional, defaults to True
            If provided, a message will be printed to stdout after every iteration,
            indicating the current training loss and (if possible) validation score.

        Returns
        -------
        self : this GMM object
            Internal attributes log_pi_K, mu_KD, stddev_KD updated.
            Performance metrics stored after every iteration in history 
        '''
        N = np.maximum(x_ND.shape[0], 1.0)

        ## Create history attribute to store progress at every checkpoint (every iteration)
        self.history = defaultdict(list)

        ## Create initial parameters at random, using self.seed for the random seed
        # Will always create same parameters if self.seed is the same value.
        log_pi_K, mu_KD, stddev_KD = self.generate_initial_parameters(x_ND)

        ## Package up parameters into one vector of unconstrained parameters
        init_param_vec = self.to_flat_array_of_unconstrained_parameters(
            log_pi_K, mu_KD, stddev_KD)

        ## Define loss fuction in terms of single vector containing all unconstrained parameters
        # Will compute the "per pixel" or "per dimension" loss
        def calc_loss(vec_M):
            ''' Compute per-pixel loss (negative log likelihood plus penalty)

            Returns
            -------
            loss : float
            '''
            # First, take current unconstrained parameters and transform back to common parameters
            # This provided transformation is autograd-able.
            log_pi_K, mu_KD, stddev_KD = self.to_common_parameters_from_flat_array(
                vec_M)

            # Second compute the loss
            # TODO replace this placeholder!
            loss_placeholder = ag_np.sum(ag_np.square(vec_M))

            # Finally, be sure this is per-pixel loss (total num pixels = N * D)
            return loss_placeholder / (N * self.D)

        ## Define gradient in terms of single vector of unconstrained parameters
        calc_grad = autograd.grad(calc_loss)
        calc_loss_and_grad = autograd.value_and_grad(calc_loss)

        ## Define callback function for monitoring progress of gradient descent
        # Will be called at every checkpoint (after every iteration of LBFGS)
        self.callback_count = 0
        self.start_time_sec = time.time()

        def callback_update_history(cur_param_vec):
            cur_loss, cur_grad_vec = calc_loss_and_grad(cur_param_vec)
            self.history['iter'].append(self.callback_count)
            self.history['train_loss_per_pixel'].append(cur_loss)

            log_pi_K, mu_KD, stddev_KD = self.to_common_parameters_from_flat_array(
                cur_param_vec)
            if x_valid_ND is None:
                valid_neg_log_lik_msg = ""  # empty message when no validation set provided
            else:
                ## TODO compute the per-pixel negative log likelihood on validation set
                ## Use calc_negative_log_lik and x_valid_ND
                valid_neg_log_lik_per_pixel = 0.0
                valid_neg_log_lik_msg = "| valid score % 9.6f" % (
                    valid_neg_log_lik_per_pixel)
                self.history['valid_neg_log_lik_per_pixel'].append(
                    valid_neg_log_lik_per_pixel)
            if verbose:
                print("iter %4d / %4d after %9.1f sec | train loss % 9.6f %s" %
                      (self.callback_count, self.max_iter, time.time() -
                       self.start_time_sec, cur_loss, valid_neg_log_lik_msg))

            ## Track L1 norm of the gradient
            # This should slowly go to exactly zero if we have converged
            self.history['grad_norm'].append(
                np.sum(np.abs(cur_grad_vec)) / cur_grad_vec.size)

            self.callback_count += 1

        ## Perform callback on initial parameters
        # Always good to know performance at original initialization
        callback_update_history(init_param_vec)

        ## Call LBFGS routine from scipy
        # This will perform many LBFGS update iterations,
        # and after each one will perform a callback using our provided function.
        # See scipy.optimize.minimize docs for details
        result = scipy.optimize.minimize(calc_loss,
                                         init_param_vec,
                                         jac=calc_grad,
                                         method='l-bfgs-b',
                                         constraints={},
                                         callback=callback_update_history,
                                         options=dict(maxiter=self.max_iter,
                                                      ftol=self.ftol))

        ## Unpack the result of the optimization
        self.result = result
        self.message = str(result.message)
        optimal_param_vec = result.x
        self.log_pi_K, self.mu_KD, self.stddev_KD = self.to_common_parameters_from_flat_array(
            optimal_param_vec)
Example #50
0
import autograd.numpy as np
from autograd import value_and_grad
from scipy.optimize import minimize

def rosenbrock(x):
    return 100*(x[1] - x[0]**2)**2 + (1 - x[0])**2

# Build a function that also returns gradients using autograd.
rosenbrock_with_grad = value_and_grad(rosenbrock)

# Optimize using conjugate gradients.
result = minimize(rosenbrock_with_grad, x0=np.array([0.0, 0.0]), jac=True, method='CG')
print "Found minimum at {0}".format(result.x)
 def train(self):
     result = minimize(value_and_grad(self.likelihood), self.hyp, jac=True,
                       method='L-BFGS-B', callback=self.callback)
     self.hyp = result.x
fn_out = dir_out + fn


def gmm_objective_wrapper(params, x, wishart_gamma, wishart_m):
    return gmm.gmm_objective(params[0], params[1], params[2], x, wishart_gamma,
                             wishart_m)


alphas, means, icf, x, wishart_gamma, wishart_m = gmm_io.read_gmm_instance(
    fn_in + ".txt", replicate_point)

tf = utils.timer(gmm.gmm_objective,
                 (alphas, means, icf, x, wishart_gamma, wishart_m),
                 nruns=nruns_f,
                 limit=time_limit)

name = "Autograd"
if nruns_J > 0:
    # k = alphas.size
    grad_gmm_objective_wrapper = value_and_grad(gmm_objective_wrapper)
    tJ, grad = utils.timer(grad_gmm_objective_wrapper,
                           ((alphas, means, icf), x, wishart_gamma, wishart_m),
                           nruns=nruns_J,
                           limit=time_limit,
                           ret_val=True)
    gmm_io.write_J(fn_out + "_J_" + name + ".txt", grad[1])
else:
    tJ = 0

utils.write_times(fn_out + "_times_" + name + ".txt", tf, tJ)
Example #53
0
    frate = F(F0, rebonato_vol, var, tj)
    return frate


# for a specific combination of S, T, K
v0 = 1.0
F0 = pd.read_pickle("start_rate.pkl")

z = np.random.rand(120)


def f(params):
    a, b, c, theta, kappa, epsilon, rho = params
    estimated = []
    for j in range(30):
        fi = f_rate(F0[j], v0, a, b, c, rho, kappa, theta, epsilon, betas[j],
                    z, z1, z2, j)
        estimated.append(fi)
    diff = target - estimated
    return np.dot(diff)


iters = 100
## dy is a function that will return
# 1. the difference between market value and estimated value
# 2. the gradients w.r.t each parameters
dy = value_and_grad(f)
init_guess = np.array(0.15, 0.015, 0.015, 0.015, 0.015, 0.015, 0.5)
# begin optimization
sol = root(dy, init_guess, jac=True, method='lm', options={"maxiter": iters})
Example #54
0
    pred_fun, loss_fun, frac_err, num_weights = build_lstm(input_size, state_size, output_size)

    def print_training_prediction(weights, train_inputs, train_targets):
        print("Training text                         Predicted text")
        logprobs = np.asarray(pred_fun(weights, train_inputs))
        for t in range(logprobs.shape[1]):
            training_text  = one_hot_to_string(train_targets[:,t,:])
            predicted_text = one_hot_to_string(logprobs[:,t,:])
            print(training_text.replace('\n', ' ') + "| " + predicted_text.replace('\n', ' '))

    def callback(weights):
        print("Train loss:", loss_fun(weights, train_inputs, train_targets))
        print_training_prediction(weights, train_inputs, train_targets)

   # Build gradient of loss function using autograd.
    loss_and_grad = value_and_grad(loss_fun)

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss_and_grad(weights):
        return loss_and_grad(weights, train_inputs, train_targets)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets))

    print("Training LSTM...")
    result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
                      options={'maxiter':train_iters}, callback=callback)
    trained_weights = result.x

    print("\nGenerating text from LSTM model...")
Example #55
0
if __name__ == '__main__':
    # Network parameters
    input_size, h1_size, h2_size, output_size = 14*14, 200, 80, 10
    print input_size, h1_size, h2_size, output_size

    # Training parameters
    param_scale = 0.1
    learning_rate = 0.1 / img_all_num
    momentum = 0.9
    batch_size = 512
    num_epochs = 5000

    # training function & backword gradient
    clac_loss, num_weights, p_o_b, accuracy, parser = \
        rnn_for_mnist(input_size, h1_size, h2_size, output_size)
    loss_and_grad = value_and_grad(clac_loss, argnum=1)

    # set batches index
    def make_batches(img_all_num, batch_size):
        return [ slice(i, min(i+batch_size, img_all_num)) 
                for i in range(0, img_all_num, batch_size) ]
    batch_idxs = make_batches( train_images.shape[1] , batch_size )

    # init random weights
    rs = npr.RandomState()
    weights = rs.randn(num_weights) * param_scale
    
    # init backforword gradient matrix
    weights_back = np.zeros(num_weights)

Example #56
0
    fig = plt.figure(figsize=(20, 8), facecolor="white")
    ax_large = fig.add_subplot(121, frameon=False)
    ax_small = fig.add_subplot(122, frameon=False)
    plt.show(block=False)

    axes_set = [False, True]  # Architecture of the GP. Last layer should always be 1

    init_params = 0.1 * rs.randn(total_num_params)
    deep_map = create_deep_map(init_params)
    init_params = initialize(deep_map, X, num_pseudo_params)

    print("Optimizing covariance parameters...")
    objective = lambda params: -log_likelihood(params, X, y, n_samples)

    params = minimize(
        value_and_grad(objective), init_params, jac=True, method="BFGS", callback=callback, options={"maxiter": 200}
    )

    params = params["x"]
    plot_xs = np.reshape(np.linspace(-5, 5, 300), (300, 1))

    deep_map = create_deep_map(params)
    for axes in axes_set:
        if axes:
            ax = ax_small
            title = "Close up"
        else:
            ax = ax_large
            title = "Far"
        plot_deep_gp(ax, params, plot_xs)
        ax.plot(np.ndarray.flatten(deep_map[0][0]["x0"]), deep_map[0][0]["y0"], "ro")