Ejemplos de quick_grad_check en Python, ejemplos de autograd.util.quick_grad_check en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_autograd.py Proyecto: as1986/neural_wfst

def test_assignment():
    def f(y):
        x = dict(a=1)
        return assignment(y, x)
    y = np.array([1.0,2.0,3.0])
    quick_grad_check(f, y, verbose=False)
    return

Ejemplo n.º 2

0

Mostrar archivo

def train_nn(train_images, train_labels, test_images, test_labels):

    # Make neural net functions
    N_weights, predict_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    loss_grad = grad(loss_fun)

    # Initialize weights
    rs = npr.RandomState()
    weights = rs.randn(N_weights) * param_scale

    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, weights, (train_images, train_labels))

    print "    Epoch      |    Train err  |   Test err  "

    def print_perf(epoch, weights):
        test_perf  = frac_err(weights, test_images, test_labels)
        train_perf = frac_err(weights, train_images, train_labels)
        print "{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf)

    # Train with sgd
    batch_idxs = make_batches(train_images.shape[0], batch_size)
    cur_dir = np.zeros(N_weights)

    for epoch in range(num_epochs):
        print_perf(epoch, weights)
        for idxs in batch_idxs:
            grad_W = loss_grad(weights, train_images[idxs], train_labels[idxs])
            cur_dir = momentum * cur_dir + (1.0 - momentum) * grad_W
            weights -= learning_rate * cur_dir

    return weights

Ejemplo n.º 3

0

Mostrar archivo

def test_assignment():
    def f(y):
        x = dict(a=1)
        return assignment(y, x)

    y = np.array([1.0, 2.0, 3.0])
    quick_grad_check(f, y, verbose=False)
    return

Ejemplo n.º 4

0

Mostrar archivo

Archivo: abstractmodel.py Proyecto: tatsukawa/hessian-for-basicDL

def create_model(args, inputs, targets):
    if args.classifier == 'logreg':
        mdl = model.create_logreg_model(args, inputs, targets)
    elif args.classifier == 'fullconn':
        mdl = model.create_fully_connected_model(args, inputs, targets)
    else:
        raise Exception('Unknown classifier type {}'.format(args.classifier))

    quick_grad_check(mdl.loss, mdl.params_flat, verbose=False)
    return mdl

Ejemplo n.º 5

0

Mostrar archivo

def make_openmm_system_autograd_compatible(topology, system, initial_positions, temperature=298 * unit.kelvin):
    """Given the specification of an openmm system and a bath temperature, and return a flattened function
    that computes the unnormalized log Gibbs density, and also defines its gradient for compatibility with autograd.
    """

    # Can also compute these on the GPU eventually.
    platform = mm.Platform.getPlatformByName("Reference")

    ghmc = GHMCIntegrator(temperature=temperature)

    beta = 1.0 / (temperature * kB)
    sim = app.Simulation(topology, system, ghmc, platform)
    sim.context.setPositions(initial_positions)

    n_atoms = len(initial_positions)

    def unflatten(x):
        """Given an (n_atoms * 3,)array, unpack into an (n_atoms, 3) array"""
        xyz = x.reshape((n_atoms, 3))
        return xyz

    @primitive
    def flat_log_q(x):
        """Use OpenMM to compute minus the reduced potential at x."""
        sim.context.setPositions(unflatten(x))
        return - sim.context.getState(getEnergy=True).getPotentialEnergy() * beta

    def grad_flat_log_q(x):
        """Use OpenMM to compute minus the gradient of the reduced potential at x."""
        sim.context.setPositions(unflatten(x))
        g = (sim.context.getState(getForces=True).getForces(asNumpy=True) * beta)
        return g.value_in_unit(g.unit).flatten()

    def make_grad_flat_log_q(ans, x):
        def gradient(g):
            return grad_flat_log_q(x)

        return gradient

    flat_log_q.defgrad(make_grad_flat_log_q)

    flat_pos = initial_positions.value_in_unit(initial_positions.unit).flatten()
    quick_grad_check(flat_log_q, flat_pos)

    return flat_log_q

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_wrappers.py Proyecto: zhangxuying1004/autograd

def test_deprecated_quick_grad_check_wrapper():
    from autograd.util import quick_grad_check
    with warnings.catch_warnings(record=True) as w:
        quick_grad_check(lambda x, y: x**2 + y, 1., (2., ))

Ejemplo n.º 7

0

Mostrar archivo

Archivo: rnn.py Proyecto: ASAPPinc/autograd

            print training_text.replace('\n', ' ') + "| " + predicted_text.replace('\n', ' ')

    def callback(weights):
        print "Train loss:", loss_fun(weights, train_inputs, train_targets)
        print_training_prediction(weights, train_inputs, train_targets)

   # Build gradient of loss function using autograd.
    loss_and_grad = grad(loss_fun, return_function_value=True)

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss_and_grad(weights):
        return loss_and_grad(weights, train_inputs, train_targets)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets))

    print "Training RNN..."
    result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
                      options={'maxiter':train_iters}, callback=callback)
    trained_weights = result.x

    print
    print "Generating text from RNN..."
    num_letters = 30
    for t in xrange(20):
        text = " "
        for i in xrange(num_letters):
            seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :]
            logprobs = pred_fun(trained_weights, seqs)[-1].ravel()
            text += chr(npr.choice(len(logprobs), p=np.exp(logprobs)))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: 1_neural_net.py Proyecto: jackkamm/autograd

    batch_size = 256
    num_epochs = 50

    # Load and process MNIST data (borrowing from Kayak)
    N_data, train_images, train_labels, test_images, test_labels = load_mnist()

    # Make neural net functions
    N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    loss_grad = grad(loss_fun)

    # Initialize weights
    rs = npr.RandomState()
    W = rs.randn(N_weights) * param_scale

    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, W, (train_images, train_labels))

    print("    Epoch      |    Train err  |   Test err  ")

    def print_perf(epoch, W):
        test_perf = frac_err(W, test_images, test_labels)
        train_perf = frac_err(W, train_images, train_labels)
        print("{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf))

    # Train with sgd
    batch_idxs = make_batches(train_images.shape[0], batch_size)
    cur_dir = np.zeros(N_weights)

    for epoch in range(num_epochs):
        print_perf(epoch, W)
        for idxs in batch_idxs:

Ejemplo n.º 9

0

Mostrar archivo

def test_likelihood_gradient():
    quick_grad_check(logprob_two_moons, np.atleast_2d(np.array([0.1, 0.2]).T))

Ejemplo n.º 10

0

Mostrar archivo

Archivo: 3_define_gradient.py Proyecto: jackkamm/autograd

# on both the input to the original function (x), and the output of the
# original function (ans).
def make_grad_logsumexp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by autograd.
    def gradient_product(g):
        # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
        # Because autograd uses reverse-mode differentiation, g contains
        # the gradient of the objective w.r.t. ans, the output of logsumexp.
        return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans))

    return gradient_product


# Now we tell autograd that logsumexmp has a gradient-making function.
logsumexp.defgrad(make_grad_logsumexp)

if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: ", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    quick_grad_check(example_func, npr.randn(10))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: define_gradient.py Proyecto: AugustLONG/autograd

# The reason for the closure is so that the gradient can depend
# on both the input to the original function (x), and the output of the
# original function (ans).
def make_grad_logsumexp(ans, x):
    # If you want to be able to take higher-order derivatives, then all the
    # code inside this function must be itself differentiable by autograd.
    def gradient_product(g):
        # This closure multiplies g with the Jacobian of logsumexp (d_ans/d_x).
        # Because autograd uses reverse-mode differentiation, g contains
        # the gradient of the objective w.r.t. ans, the output of logsumexp.
        return np.full(x.shape, g) * np.exp(x - np.full(x.shape, ans))
    return gradient_product

# Now we tell autograd that logsumexmp has a gradient-making function.
logsumexp.defgrad(make_grad_logsumexp)


if __name__ == '__main__':
    # Now we can use logsumexp() inside a larger function that we want
    # to differentiate.
    def example_func(y):
        z = y**2
        lse = logsumexp(z)
        return np.sum(lse)

    grad_of_example = grad(example_func)
    print("Gradient: ", grad_of_example(npr.randn(10)))

    # Check the gradients numerically, just to be safe.
    quick_grad_check(example_func, npr.randn(10))

Ejemplo n.º 12

0

Mostrar archivo


#Compute the gradient w.r.t the first input variable x1
g_x1_f = grad(f, 0)
#Compute the gradient w.r.t the second input variable x2
g_x2_f = grad(f, 1)
#Evaluate and print the value of the function at x1=1, x2=2
print(f(1, 2))
#Produces 2.23
#Evaluate and print the value of the gradient w.r.t x1 at x1=1, x2=2
print(g_x1_f(1, 2))
#Produces 0.44
#Evaluate and print the value of the gradient w.r.t x2 at x1=1, x2=2
print(g_x2_f(1, 2))
#Produces 0.89

from autograd.util import quick_grad_check


#Define the function
def f(x1, x2):
    return numpy.sqrt(x1 * x1 + x2 * x2)


#Computes and checks the gradient for the given values
quick_grad_check(f, 1.0, extra_args=[2.0])
#Output
#
#Checking gradient of <function f at 0x10504bed8> at 1.0
#Gradient projection OK
#(numeric grad: 0.447213595409, analytic grad: 0.

Ejemplo n.º 13

0

Mostrar archivo

Archivo: i1gradient.py Proyecto: greatabel/MachineLearning

#Wrapper Around Numpy
import autograd.numpy as numpy
#Function to generate gradients
from autograd import grad
#Define the function
def f(x1, x2): return numpy.sqrt(x1 * x1 + x2 * x2)
#Compute the gradient w.r.t the first input variable x1
g_x1_f = grad(f,0)
#Compute the gradient w.r.t the second input variable x2
g_x2_f = grad(f,1)
#Evaluate and print the value of the function at x1=1, x2=2
print( f(1,2) )
#Produces 2.23
#Evaluate and print the value of the gradient w.r.t x1 at x1=1, x2=2
print( g_x1_f(1,2) )
#Produces 0.44
#Evaluate and print the value of the gradient w.r.t x2 at x1=1, x2=2
print( g_x2_f(1,2) )
#Produces 0.89


from autograd.util import quick_grad_check
#Define the function
def f(x1, x2): return numpy.sqrt(x1 * x1 + x2 * x2)
#Computes and checks the gradient for the given values
quick_grad_check(f,1.0,extra_args=[2.0])
#Output
#
#Checking gradient of <function f at 0x10504bed8> at 1.0
#Gradient projection OK
#(numeric grad: 0.447213595409, analytic grad: 0.

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_wrappers.py Proyecto: HIPS/autograd

def test_deprecated_quick_grad_check_wrapper():
    from autograd.util import quick_grad_check
    with warnings.catch_warnings(record=True) as w:
        quick_grad_check(lambda x, y: x**2 + y, 1., (2.,))

Ejemplo n.º 15

0

Mostrar archivo

Archivo: neural_net.py Proyecto: ASAPPinc/autograd

    train_images = partial_flatten(train_images) / 255.0
    test_images  = partial_flatten(test_images)  / 255.0
    train_labels = one_hot(train_labels, 10)
    test_labels = one_hot(test_labels, 10)
    N_data = train_images.shape[0]

    # Make neural net functions
    N_weights, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    loss_grad = grad(loss_fun)

    # Initialize weights
    rs = npr.RandomState()
    W = rs.randn(N_weights) * param_scale

    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, W, (train_images, train_labels))

    print "    Epoch      |    Train err  |   Test error  "
    def print_perf(epoch, W):
        test_perf  = frac_err(W, test_images, test_labels)
        train_perf = frac_err(W, train_images, train_labels)
        print "{0:15}|{1:15}|{2:15}".format(epoch, train_perf, test_perf)

    # Train with sgd
    batch_idxs = make_batches(N_data, batch_size)
    cur_dir = np.zeros(N_weights)

    for epoch in range(num_epochs):
        print_perf(epoch, W)
        for idxs in batch_idxs:
            grad_W = loss_grad(W, train_images[idxs], train_labels[idxs])

Ejemplo n.º 16

0

Mostrar archivo

def run_aevb(train_images):
    start_time = time.time()

    # Create aevb function
    # Training parameters

    D = train_images.shape[1]

    enc_layers = [D, hidden_units, 2*latent_dimensions]
    dec_layers = [latent_dimensions, hidden_units, D]

    N_weights_enc, encoder, encoder_log_like = make_gaussian_nn(enc_layers)
    N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layers)

    # Optimize aevb
    batch_size = 100
    num_training_iters = 1600
    rs = npr.RandomState(0)
    num_steps = 0

    init_enc_w = rs.randn(N_weights_enc) * param_scale
    init_dec_w = rs.randn(N_weights_dec) * param_scale
    init_output_weights = 0.1*rs.randn(num_steps, latent_dimensions)
    init_transform_weights = 0.1*rs.randn(num_steps, latent_dimensions)
    init_biases = 0.1*rs.randn(num_steps)

    flow_sample, parser = build_flow_sampler_with_inputs(latent_dimensions, num_steps)
    parser.add_shape('encoder weights',len(init_enc_w))
    parser.add_shape('decoder weights',len(init_dec_w))

    sampler_params = np.zeros(len(parser))
    parser.put(sampler_params, 'output weights', init_output_weights)
    parser.put(sampler_params, 'transform weights', init_transform_weights)
    parser.put(sampler_params, 'biases', init_biases)
    parser.put(sampler_params,'encoder weights', init_enc_w)
    parser.put(sampler_params,'decoder weights', init_dec_w)
    batch_idxs = make_batches(train_images.shape[0], batch_size)

    log_prior = build_logprob_standard_normal(latent_dimensions)
    def batch_value_and_grad(weights, iter):
        iter = iter % len(batch_idxs)
        cur_data = train_images[batch_idxs[iter]]
        return lower_bound(weights,parser,flow_sample,encoder,decoder_log_like,log_prior,N_weights_enc,cur_data,samples_per_image,latent_dimensions,rs)
    lb_grad = grad(batch_value_and_grad)

    def lb_grad_check(weights):
        return batch_value_and_grad(weights,0)
    quick_grad_check(lb_grad_check,sampler_params)
    print 'checked!'
    kill

    def callback(params, i, grad):
        ml = batch_value_and_grad(params,i)
        print "log marginal likelihood:", ml
        #Generate samples
        num_samples = 100
        images_per_row = 10
        zs = rs.randn(num_samples,latent_dimensions)
        samples = decoder(parser.get(params, 'decoder weights'), zs)
        # samples = np.random.binomial(1,decoder(parser.get(params, 'decoding weights'), zs))

        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(samples, ax, ims_per_row=images_per_row)
        plt.savefig('samples.png')

    final_params = adam(lb_grad, sampler_params, num_training_iters, callback=callback)

    def decoder_with_weights(zs):
        return decoder(parser.get(final_params, 'decoding weights'), zs)
    return decoder_with_weights

    finish_time = time.time()
    print "total runtime", finish_time - start_time

Ejemplo n.º 17

0

Mostrar archivo

Archivo: bbvi_npvi.py Proyecto: paviabera/vboost

        # Hmc should be greater than Hbound
        assert Hmc_hi > Hbound, "bound isn't lower ya dope (%2.3f not greater than %2.3f)" % (
            Hmc_hi, Hbound)

    print "Gap percentiles [1, 50, 99] %s" % str(
        np.percentile(gaps, [1, 50, 99]))

    #########################################
    # test per mu_n function and gradient   #
    #########################################
    n = 0
    lbn, lbs = make_lower_bound_MoGn(theta, n, s2min=1e-7)
    thn = theta[n, :D]
    assert np.isclose(lower_bound_MoG(theta), lbn(thn)), "per n is bad"
    from autograd.util import quick_grad_check, nd
    quick_grad_check(lbn, thn)

    print "Hessiandiag, numeric hessian diag"
    hlbn = hessian(lbn)
    print np.diag(hlbn(thn))

    hdiag = numeric_hessian_diag(lbn, thn)
    print hdiag

    #####################################
    # Test NVPI on a small, 2d example  #
    #####################################
    from vbproj.vboost import mog
    means = np.array([[1., 1.], [-1., -1.], [-1, 1]])
    covs = np.array([2 * np.eye(2), 1 * np.eye(2), 1 * np.eye(2)])
    icovs = np.array([np.linalg.inv(c) for c in covs])

Ejemplo n.º 18

0

Mostrar archivo

Archivo: lstm.py Proyecto: karthiknrao/autograd

            print(training_text.replace('\n', ' ') + "|" + predicted_text.replace('\n', ' '))

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss(weights):
        return -loglike_fun(weights, train_inputs, train_inputs)

    def callback(weights):
        print("Train loss:", training_loss(weights))
        print_training_prediction(weights)

   # Build gradient of loss function using autograd.
    training_loss_and_grad = value_and_grad(training_loss)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(training_loss, init_weights)

    print("Training LSTM...")
    result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG',
                      options={'maxiter':train_iters}, callback=callback)
    trained_weights = result.x

    print("\nGenerating text from LSTM model...")
    num_letters = 30
    for t in range(20):
        text = ""
        for i in range(num_letters):
            seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :]
            logprobs = pred_fun(trained_weights, seqs)[-1].ravel()
            text += chr(npr.choice(len(logprobs), p=np.exp(logprobs)))
        print(text)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: rnn.py Proyecto: asappinc/autograd

    def callback(weights):
        print "Train loss:", loss_fun(weights, train_inputs, train_targets)
        print_training_prediction(weights, train_inputs, train_targets)

# Build gradient of loss function using autograd.

    loss_and_grad = grad(loss_fun, return_function_value=True)

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss_and_grad(weights):
        return loss_and_grad(weights, train_inputs, train_targets)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets))

    print "Training RNN..."
    result = minimize(training_loss_and_grad,
                      init_weights,
                      jac=True,
                      method='CG',
                      options={'maxiter': train_iters},
                      callback=callback)
    trained_weights = result.x

    print
    print "Generating text from RNN..."
    num_letters = 30
    for t in xrange(20):
        text = " "

Ejemplo n.º 20

0

Mostrar archivo

Archivo: fit_convnet.py Proyecto: fagan2888/DESI-MCMC

    # Make neural net functions
    N_weights, pred_fun, loss_fun, frac_err = \
        cv.make_nn_funs(input_shape, layer_specs, L2_reg)
    loss_grad = grad(loss_fun)

    # Initialize weights
    rs = npr.RandomState()
    W = rs.randn(N_weights) * param_scale

    print loss_fun(W, train_images[:10], samps[:10])
    print 
    %lprun -m conv_net \
    loss_grad(W, train_images[:10], samps[:10])

    # Check the gradients numerically, just to be safe
    quick_grad_check(loss_fun, W, (train_images[:10], samps[:10]))

    ########################################################################
    # Plotting/Printing Funcs
    ########################################################################
    print("    Epoch      |    Train err  |   Best error ")
    best_w = W.copy()
    best_loss = np.inf
    def callback(x, i, g):
        train_perf = loss_fun(x, train_images, samps)
        global best_loss
        global best_w
        if train_perf < best_loss:
            best_w = x.copy()
            best_loss = train_perf
        print("{0:15}|{1:15}|{2:15}".format(i, "%2.5g"%train_perf, best_loss))

Ejemplo n.º 21

0

Mostrar archivo

Archivo: logistic_regression_autograd_lbfgs.py Proyecto: yxlao/cvx-matrix-completion

    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))

# Build a toy dataset.
inputs = np.array([[0.52, 1.12,  0.77],
                   [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30],
                   [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autograd.
cost_grad = grad(cost)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(cost, weights)

# Optimize weights using gradient descent.
print "Initial loss:", cost(weights)
momentum = 0
for i in xrange(1000):
    # print cost_grad(weights)
    momentum = cost_grad(weights) + momentum*0.8
    weights -= momentum
    # print cost(weights)

print  "Trained loss:", cost(weights)
print weights

weights = np.array([0.0, 0.0, 0.0])
[x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: lstm.py Proyecto: zaxtax/autograd

    # Wrap function to only have one argument, for scipy.minimize.
    def training_loss(weights):
        return -loglike_fun(weights, train_inputs, train_inputs)

    def callback(weights):
        print("Train loss:", training_loss(weights))
        print_training_prediction(weights)

# Build gradient of loss function using autograd.

    training_loss_and_grad = value_and_grad(training_loss)

    init_weights = npr.randn(num_weights) * param_scale
    # Check the gradients numerically, just to be safe
    quick_grad_check(training_loss, init_weights)

    print("Training LSTM...")
    result = minimize(training_loss_and_grad,
                      init_weights,
                      jac=True,
                      method='CG',
                      options={'maxiter': train_iters},
                      callback=callback)
    trained_weights = result.x

    print("\nGenerating text from LSTM model...")
    num_letters = 30
    for t in range(20):
        text = ""
        for i in range(num_letters):

Ejemplo n.º 23

0

Mostrar archivo

Archivo: logistic_regression_autograd_lbfgs.py Proyecto: e3u3/cvx-matrix-completion

    preds = sigmoid(np.dot(inputs, weights))
    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
    return -np.sum(np.log(label_probabilities))


# Build a toy dataset.
inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autograd.
cost_grad = grad(cost)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(cost, weights)

# Optimize weights using gradient descent.
print "Initial loss:", cost(weights)
momentum = 0
for i in xrange(1000):
    # print cost_grad(weights)
    momentum = cost_grad(weights) + momentum * 0.8
    weights -= momentum
    # print cost(weights)

print "Trained loss:", cost(weights)
print weights

weights = np.array([0.0, 0.0, 0.0])
[x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad)