Ejemplos de FFNet en Python, ejemplos de hessianfree.FFNet en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: demos.py Proyecto: tanjeb/hessianfree-1

def mnist(model_args=None, run_args=None):
    """Test on the MNIST (digit classification) dataset."""

    # download dataset at http://deeplearning.net/data/mnist/mnist.pkl.gz
    with open("mnist.pkl", "rb") as f:
        train, _, test = pickle.load(f)

    if model_args is None:
        ff = hf.FFNet([28 * 28, 1024, 512, 256, 32, 10],
                      layers=([hf.nl.Linear()] + [hf.nl.ReLU()] * 4 +
                              [hf.nl.Softmax()]),
                      use_GPU=True,
                      debug=False)
    else:
        ff = hf.FFNet([28 * 28, 1024, 512, 256, 32, 10],
                      layers=([hf.nl.Linear()] + [hf.nl.ReLU()] * 4 +
                              [hf.nl.Softmax()]),
                      **model_args)

    inputs = train[0]
    targets = np.zeros((inputs.shape[0], 10), dtype=np.float32)
    targets[np.arange(inputs.shape[0]), train[1]] = 0.9
    targets += 0.01

    tmp = np.zeros((test[0].shape[0], 10), dtype=np.float32)
    tmp[np.arange(test[0].shape[0]), test[1]] = 0.9
    tmp += 0.01
    test = (test[0], tmp)

    if run_args is None:
        ff.run_batches(inputs,
                       targets,
                       optimizer=hf.opt.HessianFree(CG_iter=250,
                                                    init_damping=45),
                       batch_size=7500,
                       test=test,
                       max_epochs=1000,
                       test_err=hf.loss_funcs.ClassificationError(),
                       plotting=True)
    else:
        CG_iter = run_args.pop("CG_iter", 250)
        init_damping = run_args.pop("init_damping", 45)
        ff.run_batches(inputs,
                       targets,
                       optimizer=hf.opt.HessianFree(CG_iter, init_damping),
                       test=test,
                       test_err=hf.loss_funcs.ClassificationError(),
                       **run_args)

    output = ff.forward(test[0], ff.W)
    print("classification error",
          hf.loss_funcs.ClassificationError().batch_loss(output, test[1]))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: profiling.py Proyecto: tanjeb/hessianfree-1

def threshold_calc_G():
    """Compare GPU vs CPU performance on feedforward curvature calculation.

    This can use this to determine whether it is better to run some target
    network on the CPU or GPU."""

    batch_size = range(256, 1025, 256)
    layer_size = [1] + range(64, 513, 64)
    reps = 100

    times = np.zeros((len(batch_size), len(layer_size), 2))
    for i, b in enumerate(batch_size):
        inputs = np.random.randn(b, 1).astype(np.float32)
        targets = np.random.randn(b, 1).astype(np.float32)

        for j, n in enumerate(layer_size):
            ff = hf.FFNet([1, n, n, 1], use_GPU=False)
            ff.cache_minibatch(inputs, targets)

            v = np.random.randn(ff.W.size).astype(np.float32)

            for _ in range(5):
                ff.calc_G(v)

            start = time.time()
            for _ in range(reps):
                ff.calc_G(v)
            times[i, j, 0] = time.time() - start

            ff = hf.FFNet([1, n, n, 1], use_GPU=True)
            ff.cache_minibatch(inputs, targets)

            v = gpuarray.to_gpu(v)

            for _ in range(5):
                ff.GPU_calc_G(v)

            start = time.time()
            for _ in range(reps):
                ff.GPU_calc_G(v)

            v = v.get()
            times[i, j, 1] = time.time() - start

            print "b", b, "n", n, "times", times[i, j]

    print times[..., 1] - times[..., 0]

    print "batch size (%s) vs layer size (%s)" % (batch_size, layer_size)
    print " (True indicates GPU is faster)"
    print times[..., 1] < times[..., 0]

Ejemplo n.º 3

0

Mostrar archivo

def test_ff_CG(use_GPU):
    rng = np.random.RandomState(0)
    inputs = rng.randn(100, 1).astype(np.float32)
    targets = rng.randn(100, 1).astype(np.float32)
    ff = hf.FFNet([1, 10, 1], debug=False, use_GPU=use_GPU, rng=rng)
    ff.optimizer = hf.opt.HessianFree()
    ff.cache_minibatch(inputs, targets)

    deltas = ff.optimizer.conjugate_gradient(np.zeros(ff.W.size,
                                                      dtype=np.float32),
                                             ff.calc_grad(),
                                             iters=20,
                                             printing=False)

    assert deltas[0][0] == 3
    assert np.allclose(deltas[0][1], [
        -0.01693734, 0.00465961, 0.00173045, -0.00414165, -0.03843474,
        0.00636764, 0.01423731, -0.00433618, -0.00335347, 0.00935241,
        0.01242893, -0.00339621, -0.00137015, 0.00311182, 0.02883433,
        -0.00534688, -0.01032545, 0.00328636, 0.00244868, -0.00678817,
        -0.02461342, -0.02293827, -0.00737021, -0.01145663, -0.0116213,
        -0.03512985, -0.02004906, -0.02885171, -0.01596764, -0.02105034,
        -0.03943678
    ],
                       atol=1e-5)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: demos.py Proyecto: frschu/hessianfree

def connections():
    """A network with non-standard connectivity between layers."""

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 5, 1],
                  layers=hf.nl.Tanh(),
                  conns={
                      0: [1, 2],
                      1: [2, 3],
                      2: [3]
                  })

    ff.run_epochs(inputs,
                  targets,
                  optimizer=hf.opt.HessianFree(CG_iter=2),
                  max_epochs=40,
                  plotting=True)

    outputs = ff.forward(inputs)[-1]
    for i in range(4):
        print("-" * 20)
        print("input", inputs[i])
        print("target", targets[i])
        print("output", outputs[i])

Ejemplo n.º 5

0

Mostrar archivo

Archivo: demos.py Proyecto: tanjeb/hessianfree-1

def crossentropy():
    """Example of a network using cross-entropy error."""

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[1, 0], [0, 1], [0, 1], [1, 0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 2],
                  layers=[hf.nl.Linear(),
                          hf.nl.Tanh(),
                          hf.nl.Softmax()],
                  loss_type=hf.loss_funcs.CrossEntropy())

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=2),
                   max_epochs=40,
                   plotting=True)

    # using gradient descent (for comparison)
    #     ff.run_batches(inputs, targets, optimizer=SGD(l_rate=1),
    #                    max_epochs=10000, plotting=True)

    outputs = ff.forward(inputs, ff.W)[-1]
    for i in range(4):
        print "-" * 20
        print "input", inputs[i]
        print "target", targets[i]
        print "output", outputs[i]

Ejemplo n.º 6

0

Mostrar archivo

Archivo: demos.py Proyecto: tanjeb/hessianfree-1

def sparsity():
    """Example of a network with a loss function imposing sparsity on the
    neural activities."""

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[1, 0], [0, 1], [0, 1], [1, 0]], dtype=np.float32)

    ff = hf.FFNet([2, 8, 2],
                  layers=[hf.nl.Linear(),
                          hf.nl.Logistic(),
                          hf.nl.Softmax()],
                  loss_type=[
                      hf.loss_funcs.CrossEntropy(),
                      hf.loss_funcs.SparseL1(0.1, target=0)
                  ])
    # TODO: change this to SparseL2

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=10),
                   max_epochs=100,
                   plotting=True)

    # using gradient descent (for comparison)
    #     ff.run_batches(inputs, targets, optimizer=SGD(l_rate=1.0),
    #                    max_epochs=10000, plotting=True)

    output = ff.forward(inputs, ff.W)
    for i in range(4):
        print "-" * 20
        print "input", inputs[i]
        print "target", targets[i]
        print "output", output[-1][i]
        print "activity", np.mean(output[1][i])

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_testerr(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0, 1], [1, 0], [1, 0], [0, 1]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 2],
                  layers=[hf.nl.Linear(),
                          hf.nl.Tanh(),
                          hf.nl.Softmax()],
                  debug=True,
                  loss_type=hf.loss_funcs.CrossEntropy(),
                  use_GPU=use_GPU)

    err = hf.loss_funcs.ClassificationError()

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=50),
                   max_epochs=100,
                   test_err=err,
                   target_err=-1,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-4

    print outputs[-1]

    assert err.batch_loss(outputs, targets) == 0.0

Ejemplo n.º 8

0

Mostrar archivo

Archivo: demos.py Proyecto: frschu/hessianfree

def xor(use_hf=True):
    """Run a basic xor training test.

    :param bool use_hf: if True run example using Hessian-free optimization,
        otherwise use stochastic gradient descent
    """

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 1])

    if use_hf:
        ff.run_epochs(inputs,
                      targets,
                      optimizer=hf.opt.HessianFree(CG_iter=2),
                      max_epochs=40,
                      plotting=True)
    else:
        # using gradient descent (for comparison)
        ff.run_epochs(inputs,
                      targets,
                      optimizer=hf.opt.SGD(l_rate=1),
                      max_epochs=10000,
                      plotting=True)

    outputs = ff.forward(inputs)[-1]
    for i in range(4):
        print("-" * 2)
        print("input", inputs[i])
        print("target", targets[i])
        print("output", outputs[i])

Ejemplo n.º 9

0

Mostrar archivo

Archivo: demos.py Proyecto: tanjeb/hessianfree-1

def connections():
    """Example of a network with non-standard connectivity between layers."""

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 5, 1],
                  layers=hf.nl.Tanh(),
                  conns={
                      0: [1, 2],
                      1: [2, 3],
                      2: [3]
                  })

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=2),
                   max_epochs=40,
                   plotting=True)

    # using gradient descent (for comparison)
    #     ff.run_batches(inputs, targets, optimizer=SGD(l_rate=1),
    #                    max_epochs=10000, plotting=True)

    outputs = ff.forward(inputs, ff.W)[-1]
    for i in range(4):
        print "-" * 20
        print "input", inputs[i]
        print "target", targets[i]
        print "output", outputs[i]

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_gpu.py Proyecto: mtkwT/hessianfree-1

def test_ff_calc_G(dtype):
    inputs = np.random.randn(1000, 1).astype(dtype)
    ff = hf.FFNet([1, 10, 1], debug=(dtype == np.float64), use_GPU=True)
    ff.cache_minibatch(inputs, inputs)

    v = np.random.randn(ff.W.size).astype(dtype)
    gpu_Gv = ff.GPU_calc_G(v)
    cpu_Gv = ff.calc_G(v)

    assert np.allclose(gpu_Gv, cpu_Gv, rtol=1e-4)

Ejemplo n.º 11

0

Mostrar archivo

def test_stripped_batch(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 1], debug=True, use_GPU=use_GPU)
    W_copy = ff.W.copy()

    ff.run_epochs(inputs,
                  targets,
                  optimizer=hf.opt.HessianFree(CG_iter=2),
                  max_epochs=20,
                  print_period=None)

    ff2 = hf.FFNet([2, 5, 1], debug=True, use_GPU=use_GPU, load_weights=W_copy)
    ff2.optimizer = hf.opt.HessianFree(CG_iter=2)
    for _ in range(20):
        ff2._run_epoch(inputs, targets)

    assert np.allclose(ff.forward(inputs)[-1], ff2.forward(inputs)[-1])

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_SGD(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 1], debug=False, use_GPU=use_GPU)

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.SGD(l_rate=1),
                   max_epochs=10000,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-3

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_xor(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 1], debug=True, use_GPU=use_GPU)

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=2),
                   max_epochs=40,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-5

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_softlif(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0.1], [1], [1], [0.1]], dtype=np.float32)

    lifs = hf.nl.SoftLIF(sigma=1, tau_ref=0.002, tau_rc=0.02, amp=0.01)

    ff = hf.FFNet([2, 10, 1], layers=lifs, debug=True, use_GPU=use_GPU)

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=50),
                   max_epochs=50,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-5

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_asym_dact(use_GPU):
    class Roll(hf.nl.Nonlinearity):
        def activation(self, x):
            return np.roll(x, 1, axis=-1)

        def d_activation(self, x, _):
            d_act = np.roll(np.eye(x.shape[-1], dtype=x.dtype), 1, axis=0)
            return np.resize(d_act, np.concatenate(
                (x.shape[:-1], d_act.shape)))

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 1], layers=Roll(), debug=True, use_GPU=use_GPU)

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=2),
                   max_epochs=40,
                   print_period=None)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: profiling.py Proyecto: tanjeb/hessianfree-1

def profile_calc_G(cprofile=True):
    """Run a profiler on the feedforward curvature calculation.

    :param bool cprofile: use True if profiling on the CPU, False if using the
        CUDA profiler
    """

    inputs = np.random.randn(1024, 1).astype(np.float32)
    targets = np.random.randn(1024, 1).astype(np.float32)
    N = 1024

    ff = hf.FFNet([1, N, N, 1], use_GPU=True)
    ff.cache_minibatch(inputs, targets)

    v = np.random.randn(ff.W.size).astype(np.float32)

    for _ in range(5):
        # run it a few times to get rid of any startup overhead
        ff.GPU_calc_G(v)

    if cprofile:
        start = time.time()

        p = Profile()
        p.enable()
    else:
        pycuda.driver.start_profiler()

    for _ in range(500):
        _ = ff.GPU_calc_G(v)

    if cprofile:
        p.disable()

        print "time", time.time() - start

        ps = pstats.Stats(p)
        ps.strip_dirs().sort_stats('time').print_stats(20)
    else:
        pycuda.driver.stop_profiler()

Ejemplo n.º 17

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_sparsity(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 8, 1],
                  debug=True,
                  use_GPU=use_GPU,
                  loss_type=[
                      hf.loss_funcs.SquaredError(),
                      hf.loss_funcs.SparseL2(0.01, target=0)
                  ])

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=50),
                   max_epochs=100,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-2

    assert np.mean(outputs[1]) < 0.1

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_connections(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 5, 1],
                  layers=hf.nl.Tanh(),
                  debug=True,
                  conns={
                      0: [1, 2],
                      1: [3],
                      2: [3]
                  },
                  use_GPU=use_GPU)

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=50),
                   max_epochs=50,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-5

Ejemplo n.º 19

0

Mostrar archivo

Archivo: test_ffnet.py Proyecto: tanjeb/hessianfree-1

def test_crossentropy(use_GPU):
    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray(
        [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
        dtype=np.float32)

    ff = hf.FFNet([2, 5, 4],
                  layers=[hf.nl.Linear(),
                          hf.nl.Tanh(),
                          hf.nl.Softmax()],
                  debug=True,
                  loss_type=hf.loss_funcs.CrossEntropy(),
                  use_GPU=use_GPU)

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=50),
                   max_epochs=100,
                   print_period=None)

    outputs = ff.forward(inputs, ff.W)

    assert ff.loss.batch_loss(outputs, targets) < 1e-5

Ejemplo n.º 20

0

Mostrar archivo

Archivo: demos.py Proyecto: frschu/hessianfree

def crossentropy():
    """A network that modifies the layer types and loss function."""

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[1, 0], [0, 1], [0, 1], [1, 0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 2],
                  layers=[hf.nl.Linear(),
                          hf.nl.Tanh(),
                          hf.nl.Softmax()],
                  loss_type=hf.loss_funcs.CrossEntropy())

    ff.run_epochs(inputs,
                  targets,
                  optimizer=hf.opt.HessianFree(CG_iter=2),
                  max_epochs=40,
                  plotting=True)

    outputs = ff.forward(inputs)[-1]
    for i in range(4):
        print("-" * 20)
        print("input", inputs[i])
        print("target", targets[i])
        print("output", outputs[i])

Ejemplo n.º 21

0

Mostrar archivo

Archivo: demos.py Proyecto: tanjeb/hessianfree-1

def xor():
    """Run a basic xor training test."""

    inputs = np.asarray([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
    targets = np.asarray([[0], [1], [1], [0]], dtype=np.float32)

    ff = hf.FFNet([2, 5, 1])

    ff.run_batches(inputs,
                   targets,
                   optimizer=hf.opt.HessianFree(CG_iter=2),
                   max_epochs=40,
                   plotting=True)

    # using gradient descent (for comparison)
    #     ff.run_batches(inputs, targets, optimizer=SGD(l_rate=1),
    #                    max_epochs=10000, plotting=True)

    outputs = ff.forward(inputs, ff.W)[-1]
    for i in range(4):
        print "-" * 20
        print "input", inputs[i]
        print "target", targets[i]
        print "output", outputs[i]

Ejemplo n.º 22

0

Mostrar archivo

pshape = lambda a_list: [w.shape for w in a_list]

# define hyperparameters
layers = (len(n_nodes) - 1) * ['ReLU'] + [
    'Linear'
]  # all relu except linear for output layer
n_nodes = [42, 24, 12, 1]  # number of units per layer
batch_size = 1024

# initialize a hessian free model with GPU use optional
ff = hf.FFNet(n_nodes,
              layers=layers,
              loss_type=mse(),
              W_init_params={
                  "coeff": 1.0,
                  "biases": 1.0,
                  "init_type": 'gaussian'
              },
              use_GPU=0)

ff.run_epochs(X,
              ret,
              test=(X_val, ret_val),
              minibatch_size=1024,
              optimizer=hf.opt.HessianFree(CG_iter=2),
              max_epochs=50,
              plotting=True,
              print_period=None)

print 'After fitting on the training set for 100 epochs, hessian free return this weight parameter'

Ejemplo n.º 23

0

Mostrar archivo

Archivo: demos.py Proyecto: frschu/hessianfree

def mnist(model_args=None, run_args=None):
    """Test on the MNIST (digit classification) dataset.

    Download dataset at http://deeplearning.net/data/mnist/mnist.pkl.gz

    :param dict model_args: kwargs that will be passed to the :class:`.FFNet`
        constructor
    :param dict run_args: kwargs that will be passed to :meth:`.run_epochs`
    """

    with open("mnist.pkl", "rb") as f:
        try:
            train, _, test = pickle.load(f)
        except UnicodeDecodeError:
            # python 3
            with open("mnist.pkl", "rb") as f2:
                train, _, test = pickle.load(f2, encoding="bytes")

    if model_args is None:
        ff = hf.FFNet([28 * 28, 1024, 512, 256, 32, 10],
                      layers=([hf.nl.Linear()] + [hf.nl.ReLU()] * 4 +
                              [hf.nl.Softmax()]),
                      use_GPU=True,
                      debug=False)
    else:
        ff = hf.FFNet([28 * 28, 1024, 512, 256, 32, 10],
                      layers=([hf.nl.Linear()] + [hf.nl.ReLU()] * 4 +
                              [hf.nl.Softmax()]),
                      **model_args)

    inputs = train[0]
    targets = np.zeros((inputs.shape[0], 10), dtype=np.float32)
    targets[np.arange(inputs.shape[0]), train[1]] = 0.9
    targets += 0.01

    tmp = np.zeros((test[0].shape[0], 10), dtype=np.float32)
    tmp[np.arange(test[0].shape[0]), test[1]] = 0.9
    tmp += 0.01
    test = (test[0], tmp)

    if run_args is None:
        ff.run_epochs(inputs,
                      targets,
                      optimizer=hf.opt.HessianFree(CG_iter=250,
                                                   init_damping=45),
                      minibatch_size=7500,
                      test=test,
                      max_epochs=125,
                      test_err=hf.loss_funcs.ClassificationError(),
                      plotting=True)
    else:
        CG_iter = run_args.pop("CG_iter", 250)
        init_damping = run_args.pop("init_damping", 45)
        ff.run_epochs(inputs,
                      targets,
                      optimizer=hf.opt.HessianFree(CG_iter, init_damping),
                      test=test,
                      test_err=hf.loss_funcs.ClassificationError(),
                      **run_args)

    output = ff.forward(test[0])
    print("classification error",
          hf.loss_funcs.ClassificationError().batch_loss(output, test[1]))