Python RNNet Examples, hessianfree.RNNet Python Examples

Example #1

0

Show file

File: profiling.py Project: tanjeb/hessianfree-1

def threshold_rnn_calc_G():
    """Compare GPU vs CPU performance on recurrent curvature calculation.

    This can use this to determine whether it is better to run some target
    network on the CPU or GPU."""

    batch_size = 1024
    layer_size = [1] + range(32, 129, 32)
    sig_len = [1] + range(8, 33, 8)
    reps = 100

    times = np.zeros((len(sig_len), len(layer_size), 2))
    for i, b in enumerate(sig_len):
        inputs = np.random.randn(batch_size, b, 1).astype(np.float32)
        targets = np.random.randn(batch_size, b, 1).astype(np.float32)

        for j, n in enumerate(layer_size):
            rnn = hf.RNNet([1, n, 1], use_GPU=False)
            rnn.cache_minibatch(inputs, targets)

            v = np.random.randn(rnn.W.size).astype(np.float32)

            for _ in range(5):
                rnn.calc_G(v)

            start = time.time()
            for _ in range(reps):
                rnn.calc_G(v)
            times[i, j, 0] = time.time() - start

            rnn = hf.RNNet([1, n, 1], use_GPU=True)
            rnn.cache_minibatch(inputs, targets)

            v = gpuarray.to_gpu(v)

            for _ in range(5):
                rnn.GPU_calc_G(v)

            start = time.time()
            for _ in range(reps):
                rnn.GPU_calc_G(v)

            v = v.get()
            times[i, j, 1] = time.time() - start

            print "b", b, "n", n, "times", times[i, j]

    print times[..., 1] - times[..., 0]

    print "signal length (%s) versus layer size (%s)" % (sig_len, layer_size)
    print " (True indicates GPU is faster)"
    print times[..., 1] < times[..., 0]

Example #2

0

Show file

def test_asym_dact(use_GPU):
    class Roll(hf.nl.Nonlinearity):
        def activation(self, x):
            return np.roll(x, 1, axis=-1)

        def d_activation(self, x, _):
            d_act = np.roll(np.eye(x.shape[-1], dtype=x.dtype), 1, axis=0)
            return np.resize(d_act, np.concatenate(
                (x.shape[:-1], d_act.shape)))

    n_inputs = 3
    sig_len = 5

    inputs = np.outer(np.linspace(0.1, 0.9, n_inputs), np.ones(sig_len))[:, :,
                                                                         None]
    targets = np.outer(np.linspace(0.1, 0.9, n_inputs),
                       np.linspace(0, 1, sig_len))[:, :, None]
    inputs = inputs.astype(np.float32)
    targets = targets.astype(np.float32)

    rnn = hf.RNNet(shape=[1, 5, 1], layers=Roll(), debug=True, use_GPU=use_GPU)

    rnn.run_batches(inputs,
                    targets,
                    optimizer=HessianFree(CG_iter=100),
                    max_epochs=30,
                    print_period=None)

Example #3

0

Show file

def test_continuous(use_GPU):
    n_inputs = 3
    sig_len = 5
    nl = Continuous(Logistic(), tau=np.random.uniform(1, 3, size=5), dt=0.9)
    inputs = np.outer(np.linspace(0.1, 0.9, n_inputs), np.ones(sig_len))[:, :,
                                                                         None]
    targets = np.outer(np.linspace(0.1, 0.9, n_inputs),
                       np.linspace(0, 1, sig_len))[:, :, None]
    inputs = inputs.astype(np.float32)
    targets = targets.astype(np.float32)

    rnn = hf.RNNet(shape=[1, 5, 1],
                   layers=[Linear(), nl, Logistic()],
                   debug=True,
                   use_GPU=use_GPU)

    rnn.run_batches(inputs,
                    targets,
                    optimizer=HessianFree(CG_iter=100),
                    max_epochs=30,
                    print_period=None)

    outputs = rnn.forward(inputs, rnn.W)

    assert rnn.loss.batch_loss(outputs, targets) < 1e-4

Example #4

0

Show file

def test_strucdamping(use_GPU):
    n_inputs = 3
    sig_len = 5

    inputs = np.outer(np.linspace(0.1, 0.9, n_inputs), np.ones(sig_len))[:, :,
                                                                         None]
    targets = np.outer(np.linspace(0.1, 0.9, n_inputs),
                       np.linspace(0, 1, sig_len))[:, :, None]
    inputs = inputs.astype(np.float32)
    targets = targets.astype(np.float32)

    optimizer = HessianFree(CG_iter=100)

    rnn = hf.RNNet(shape=[1, 5, 1],
                   loss_type=[
                       hf.loss_funcs.SquaredError(),
                       hf.loss_funcs.StructuralDamping(0.1,
                                                       optimizer=optimizer)
                   ],
                   debug=True,
                   use_GPU=use_GPU)

    rnn.run_batches(inputs,
                    targets,
                    optimizer=optimizer,
                    max_epochs=30,
                    print_period=None)

    outputs = rnn.forward(inputs, rnn.W)

    assert rnn.loss.batch_loss(outputs, targets) < 1e-4

Example #5

0

Show file

def test_rnn_CG(use_GPU):
    rng = np.random.RandomState(0)
    inputs = rng.randn(100, 10, 2).astype(np.float32)
    targets = rng.randn(100, 10, 1).astype(np.float32)
    rnn = hf.RNNet([2, 5, 1], debug=False, use_GPU=use_GPU, rng=rng)
    rnn.optimizer = hf.opt.HessianFree()
    rnn.cache_minibatch(inputs, targets)

    deltas = rnn.optimizer.conjugate_gradient(np.zeros(rnn.W.size,
                                                       dtype=np.float32),
                                              rnn.calc_grad(),
                                              iters=20,
                                              printing=False)

    assert deltas[1][0] == 6
    assert np.allclose(deltas[1][1], [
        2.88910931e-03, -1.08404364e-02, 6.17342826e-04, -1.85968506e-03,
        1.71574634e-02, 3.08436429e-04, -5.35693355e-02, -2.39962409e-03,
        5.33994753e-03, 3.52956937e-03, 1.83414537e-02, -1.20746918e-01,
        4.14435379e-03, 5.21760620e-03, 7.41007701e-02, -2.86964715e-01,
        -2.21885830e-01, -3.84823292e-01, -2.63742000e-01, -9.64779630e-02,
        -4.55241114e-01, 9.68043320e-03, -5.81301711e-02, 1.87756377e-03,
        3.52657953e-05, 3.19301970e-02, 7.79627683e-03, -4.76030372e-02,
        1.58238632e-03, 1.87149423e-03, 2.43508108e-02, 1.32407937e-02,
        -8.43726397e-02, 2.58994917e-03, 2.43114564e-03, 4.95423339e-02,
        1.13963615e-02, -7.54035711e-02, 2.11156602e-03, 4.81781084e-03,
        4.49908487e-02, 4.63910261e-03, -3.11208423e-02, 1.24892767e-03,
        2.63486174e-03, 1.77674163e-02, 1.60023139e-03, -1.40727460e-02,
        7.28542393e-04, 6.10395044e-04, 1.20819537e-02
    ],
                       atol=1e-5)

Example #6

0

Show file

File: test_gpu.py Project: mtkwT/hessianfree-1

def test_rnn_calc_G(dtype):
    inputs = np.random.randn(1000, 10, 1).astype(dtype)
    rnn = hf.RNNet([1, 10, 1], debug=(dtype == np.float64), use_GPU=True)
    rnn.cache_minibatch(inputs, inputs)
    rnn.optimizer = hf.opt.HessianFree()

    v = np.random.randn(rnn.W.size).astype(dtype)
    gpu_Gv = rnn.GPU_calc_G(v)
    cpu_Gv = rnn.calc_G(v)

    assert np.allclose(gpu_Gv, cpu_Gv, rtol=1e-4)

Example #7

0

Show file

File: test_rnnet.py Project: mtkwT/hessianfree-1

def test_truncation(use_GPU):
    n_inputs = 2
    sig_len = 6

    inputs = np.ones((n_inputs, sig_len, 1), dtype=np.float32) * 0.5
    targets = np.ones((n_inputs, sig_len, 1), dtype=np.float32) * 0.5

    rnn = hf.RNNet(shape=[1, 8, 1],
                   debug=True,
                   use_GPU=use_GPU,
                   truncation=(3, 3))

    rnn.run_epochs(inputs,
                   targets,
                   optimizer=HessianFree(CG_iter=100),
                   max_epochs=10,
                   print_period=None)

Example #8

0

Show file

File: profiling.py Project: tanjeb/hessianfree-1

def profile_rnn_calc_G(cprofile=True):
    """Run a profiler on the recurrent curvature calculation.

    :param bool cprofile: use True if profiling on the CPU, False if using the
        CUDA profiler
    """

    inputs = np.random.randn(1024, 128, 1).astype(np.float32)
    targets = np.random.randn(1024, 128, 1).astype(np.float32)
    N = 128

    rnn = hf.RNNet([1, N, 1], use_GPU=True)
    rnn.optimizer = hf.opt.HessianFree()  # for struc_damping check
    rnn.cache_minibatch(inputs, targets)

    v = np.random.randn(rnn.W.size).astype(np.float32)

    for _ in range(2):
        # run it a few times to get rid of any startup overhead
        rnn.GPU_calc_G(v)

    if cprofile:
        start = time.time()

        p = Profile()
        p.enable()
    else:
        pycuda.driver.start_profiler()

    for _ in range(100):
        _ = rnn.GPU_calc_G(v)

    if cprofile:
        p.disable()

        print "time", time.time() - start

        ps = pstats.Stats(p)
        ps.strip_dirs().sort_stats('time').print_stats(20)
    else:
        pycuda.driver.stop_profiler()

Example #9

0

Show file

def test_truncation(use_GPU):
    n_inputs = 2
    sig_len = 6

    inputs = np.ones((n_inputs, sig_len, 1), dtype=np.float32) * 0.5
    targets = np.ones((n_inputs, sig_len, 1), dtype=np.float32) * 0.5

    rnn = hf.RNNet(shape=[1, 5, 1],
                   debug=True,
                   use_GPU=use_GPU,
                   truncation=(3, 3))

    rnn.run_batches(inputs,
                    targets,
                    optimizer=HessianFree(CG_iter=100),
                    max_epochs=30,
                    print_period=None)

    outputs = rnn.forward(inputs, rnn.W)

    assert rnn.loss.batch_loss(outputs, targets) < 1e-4

Example #10

0

Show file

def test_integrator(use_GPU):
    n_inputs = 3
    sig_len = 5

    inputs = np.outer(np.linspace(0.1, 0.9, n_inputs), np.ones(sig_len))[:, :,
                                                                         None]
    targets = np.outer(np.linspace(0.1, 0.9, n_inputs),
                       np.linspace(0, 1, sig_len))[:, :, None]
    inputs = inputs.astype(np.float32)
    targets = targets.astype(np.float32)

    rnn = hf.RNNet(shape=[1, 5, 1], debug=True, use_GPU=use_GPU)

    rnn.run_batches(inputs,
                    targets,
                    optimizer=HessianFree(CG_iter=100),
                    max_epochs=30,
                    print_period=None)

    outputs = rnn.forward(inputs, rnn.W)

    assert rnn.loss.batch_loss(outputs, targets) < 1e-4

Example #11

0

Show file

File: demos.py Project: tanjeb/hessianfree-1

def plant(plots=True):
    """Example of a network using a dynamic plant as the output layer."""

    n_inputs = 32
    sig_len = 15

    class Plant(hf.nl.Plant):
        # this plant implements a simple dynamic system, with two-dimensional
        # state representing [position, velocity]
        def __init__(self, A, B, targets, init_state):
            super(Plant, self).__init__()

            self.A = np.asarray(A)
            self.B = B

            self.targets = targets
            self.init_state = init_state

            self.shape = [n_inputs, sig_len, len(A)]

            # derivative of output with respect to state (constant, so just
            # compute it once here)
            self.d_output = np.resize(np.eye(
                self.shape[-1]), (n_inputs, self.shape[-1], self.shape[-1], 1))

            self.reset()

        def activation(self, x):
            self.act_count += 1

            # this implements a basic s_{t+1} = A*s_t + B*x dynamic system.
            # but to make things a little more complicated we allow the B
            # matrix to be dynamic, so it's actually
            # s_{t+1} = A*s_t + B(s_t)*x

            self.B_matrix, self.d_B_matrix = self.B(self.state)

            self.state = (np.dot(self.state, self.A) +
                          np.einsum("ij,ijk->ik", x, self.B_matrix))

            return self.state[:x.shape[0]]

        def d_activation(self, x, _):
            self.d_act_count += 1
            assert self.act_count == self.d_act_count

            # derivative of state with respect to input
            d_input = self.B_matrix.transpose((0, 2, 1))[..., None]

            # derivative of state with respect to previous state
            d_state = np.resize(self.A.T,
                                np.concatenate(([x.shape[0]], self.A.shape)))
            d_state[:, 1, 0] += x[:, 1] * self.d_B_matrix[:, 1, 1]
            d_state = d_state[..., None]

            return np.concatenate((d_input, d_state, self.d_output), axis=-1)

        def __call__(self, _):
            self.inputs = np.concatenate((self.inputs, self.state[:, None, :]),
                                         axis=1)
            return self.state

        def get_inputs(self):
            return self.inputs

        def get_targets(self):
            return self.targets

        def reset(self, init=None):
            self.act_count = 0
            self.d_act_count = 0
            self.state = (self.init_state.copy()
                          if init is None else init.copy())
            self.inputs = np.zeros((self.shape[0], 0, self.shape[-1]),
                                   dtype=np.float32)
            self.B_matrix = self.d_B_matrix = None

    # static A matrix (converts velocity into a change in position)
    A = [[1, 0], [0.2, 1]]

    # dynamic B(s) matrix (converts input into velocity, modulated by current
    # state)
    # note that this dynamic B matrix doesn't really make much sense, it's
    # just here to demonstrate what happens with a plant whose dynamics
    # change over time
    def B(state):
        B = np.zeros((state.shape[0], state.shape[1], state.shape[1]))
        B[:, 1, 1] = np.tanh(state[:, 0])

        d_B = np.zeros((state.shape[0], state.shape[1], state.shape[1]))
        d_B[:, 1, 1] = 1 - np.tanh(state[:, 0])**2

        return B, d_B

    # random initial position and velocity
    init_state = np.random.uniform(-0.5, 0.5, size=(n_inputs, 2))

    # the target will be to end at position 1 with velocity 0
    targets = np.ones((n_inputs, sig_len, 2), dtype=np.float32)
    targets[:, :, 1] = 0
    targets[:, :-1, :] = np.nan

    plant = Plant(A, B, targets, init_state)

    rnn = hf.RNNet(shape=[2, 16, 2],
                   layers=[hf.nl.Linear(), hf.nl.Tanh(), plant],
                   W_init_params={"coeff": 0.1},
                   W_rec_params={"coeff": 0.1},
                   rng=np.random.RandomState(0))

    rnn.run_batches(plant,
                    None,
                    hf.opt.HessianFree(CG_iter=20, init_damping=10),
                    max_epochs=150,
                    plotting=plots)

    # using gradient descent (for comparison)
    #     rnn.run_batches(plant, None, optimizer=SGD(l_rate=0.01),
    #                     batch_size=None, test=test, max_epochs=10000,
    #                     plotting=True)

    if plots:
        outputs = rnn.forward(plant, rnn.W)[-1]

        plt.figure()
        plt.plot(outputs[:, :, 0].squeeze().T)
        plt.title("position")

        plt.figure()
        plt.plot(outputs[:, :, 1].squeeze().T)
        plt.title("velocity")

        plt.show()

Example #12

0

Show file

File: demos.py Project: tanjeb/hessianfree-1

def adding(T=50, plots=True):
    """The canonical "adding" test of long-range dependency learning for RNNs.
    """

    # set up inputs
    N = 100000
    test_cut = int(N * 0.9)

    vals = np.random.uniform(0, 1, size=(N, T, 1)).astype(np.float32)
    mask = np.zeros((N, T, 1), dtype=np.float32)
    for m in mask:
        m[np.random.randint(T / 10)] = 1
        m[np.random.randint(T / 10, T / 2)] = 1
    inputs = np.concatenate((vals, mask), axis=-1)

    tmp = np.zeros_like(vals)
    tmp[mask.astype(np.bool)] = vals[mask.astype(np.bool)]

    targets = np.zeros((N, T, 1), dtype=np.float32)
    targets[:] = np.nan
    targets[:, -1] = np.sum(tmp, axis=1, dtype=np.float32)

    test = (inputs[test_cut:], targets[test_cut:])

    # build network
    optimizer = hf.opt.HessianFree(CG_iter=60, init_damping=20)
    W_init_params = {"coeff": 0.25}
    rnn = hf.RNNet(shape=[2, 32, 64, 1],
                   layers=[
                       hf.nl.Linear(),
                       hf.nl.ReLU(),
                       hf.nl.Continuous(hf.nl.ReLU(), tau=20),
                       hf.nl.ReLU()
                   ],
                   W_init_params=W_init_params,
                   loss_type=[
                       hf.loss_funcs.SquaredError(),
                       hf.loss_funcs.StructuralDamping(1e-4,
                                                       layers=[2],
                                                       optimizer=optimizer)
                   ],
                   rec_layers=[2],
                   use_GPU=True,
                   debug=False,
                   rng=np.random.RandomState(0))

    # scale spectral radius of recurrent weights
    W, _ = rnn.get_weights(rnn.W, (2, 2))
    W *= 1.0 / np.max(np.abs(np.linalg.eigvals(W)))

    rnn.run_batches(inputs[:test_cut],
                    targets[:test_cut],
                    optimizer=optimizer,
                    batch_size=1024,
                    test=test,
                    max_epochs=50,
                    plotting=plots,
                    test_err=hf.loss_funcs.SquaredError())

    if plots:
        outputs = rnn.forward(inputs[:20], rnn.W)
        plt.figure()
        lines = plt.plot(outputs[-1][:].squeeze().T)
        plt.scatter(np.ones(outputs[-1].shape[0]) * outputs[-1].shape[1],
                    targets[:20, -1],
                    c=[plt.getp(l, "color") for l in lines])
        plt.title("outputs")

        plt.show()

Example #13

0

Show file

File: demos.py Project: tanjeb/hessianfree-1

def integrator(model_args=None,
               run_args=None,
               n_inputs=15,
               sig_len=10,
               plots=True):
    """Example of a recurrent network, implementing an integrator."""

    inputs = np.outer(np.linspace(0.1, 0.9, n_inputs), np.ones(sig_len))[:, :,
                                                                         None]
    targets = np.outer(np.linspace(0.1, 0.9, n_inputs),
                       np.linspace(0, 1, sig_len))[:, :, None]
    inputs = inputs.astype(np.float32)
    targets = targets.astype(np.float32)

    test = (inputs, targets)

    if model_args is None:
        rnn = hf.RNNet(shape=[1, 10, 1],
                       layers=hf.nl.Logistic(),
                       debug=False,
                       use_GPU=False)
    else:
        rnn = hf.RNNet(**model_args)

    if run_args is None:
        rnn.run_batches(inputs,
                        targets,
                        optimizer=hf.opt.HessianFree(CG_iter=100),
                        test=test,
                        max_epochs=30,
                        plotting=plots)
    else:
        CG_iter = run_args.pop("CG_iter", 100)
        init_damping = run_args.pop("init_damping", 1)
        rnn.run_batches(inputs,
                        targets,
                        optimizer=hf.opt.HessianFree(CG_iter, init_damping),
                        test=test,
                        plotting=plots,
                        **run_args)

    # using gradient descent (for comparison)


#     rnn.run_batches(inputs, targets, optimizer=SGD(l_rate=0.1),
#                     batch_size=None, test=test, max_epochs=10000,
#                     plotting=True)

    if plots:
        plt.figure()
        plt.plot(inputs.squeeze().T)
        plt.title("inputs")

        plt.figure()
        plt.plot(targets.squeeze().T)
        plt.title("targets")

        outputs = rnn.forward(inputs, rnn.W)[-1]
        plt.figure()
        plt.plot(outputs.squeeze().T)
        plt.title("outputs")

        plt.show()

Example #14

0

Show file

File: demos.py Project: frschu/hessianfree

def integrator(model_args=None,
               run_args=None,
               n_inputs=15,
               sig_len=10,
               plots=True):
    """A recurrent network implementing an integrator.

    :param dict model_args: kwargs that will be passed to the :class:`.RNNet`
        constructor
    :param dict run_args: kwargs that will be passed to :meth:`.run_epochs`
    :param int n_inputs: size of batch to train on
    :param int sig_len: number of timesteps to run for
    :param bool plots: display plots of trained output
    """

    inputs = np.outer(np.linspace(0.1, 0.9, n_inputs), np.ones(sig_len))[:, :,
                                                                         None]
    targets = np.outer(np.linspace(0.1, 0.9, n_inputs),
                       np.linspace(0, 1, sig_len))[:, :, None]
    inputs = inputs.astype(np.float32)
    targets = targets.astype(np.float32)

    test = (inputs, targets)

    if model_args is None:
        rnn = hf.RNNet(shape=[1, 10, 1],
                       layers=hf.nl.Logistic(),
                       debug=False,
                       use_GPU=False)
    else:
        rnn = hf.RNNet(**model_args)

    if run_args is None:
        rnn.run_epochs(inputs,
                       targets,
                       optimizer=hf.opt.HessianFree(CG_iter=100),
                       test=test,
                       max_epochs=30,
                       plotting=plots)
    else:
        CG_iter = run_args.pop("CG_iter", 100)
        init_damping = run_args.pop("init_damping", 1)
        rnn.run_epochs(inputs,
                       targets,
                       optimizer=hf.opt.HessianFree(CG_iter, init_damping),
                       test=test,
                       plotting=plots,
                       **run_args)

    if plots:
        plt.figure()
        plt.plot(inputs.squeeze().T)
        plt.title("inputs")

        plt.figure()
        plt.plot(targets.squeeze().T)
        plt.title("targets")

        outputs = rnn.forward(inputs)[-1]
        plt.figure()
        plt.plot(outputs.squeeze().T)
        plt.title("outputs")

        plt.show()

Example #15

0

Show file

def test_plant(use_GPU):
    n_inputs = 32
    sig_len = 15

    class Plant(hf.nl.Plant):
        # this plant implements a simple dynamic system, with two-dimensional
        # state representing [position, velocity]
        def __init__(self, A, B, targets, init_state):
            super(Plant, self).__init__(stateful=True)

            self.A = np.asarray(A)
            self.B = B

            self.targets = targets
            self.init_state = init_state

            self.shape = [n_inputs, sig_len, len(A)]

            # derivative of output with respect to state (constant, so just
            # compute it once here)
            self.d_output = np.resize(np.eye(
                self.shape[-1]), (n_inputs, self.shape[-1], self.shape[-1], 1))

            self.reset()

        def activation(self, x):
            self.act_count += 1

            # this implements a basic s_{t+1} = A*s_t + B*x dynamic system.
            # but to make things a little more complicated we allow the B
            # matrix to be dynamic, so it's actually
            # s_{t+1} = A*s_t + B(s_t)*x

            self.B_matrix, self.d_B_matrix = self.B(self.state)

            self.state = (np.dot(self.state, self.A) +
                          np.einsum("ij,ijk->ik", x, self.B_matrix))

            return self.state[:x.shape[0]]

        def d_activation(self, x, _):
            self.d_act_count += 1
            assert self.act_count == self.d_act_count

            # derivative of state with respect to input
            d_input = self.B_matrix.transpose((0, 2, 1))[..., None]

            # derivative of state with respect to previous state
            d_state = np.resize(self.A.T,
                                np.concatenate(([x.shape[0]], self.A.shape)))
            d_state[:, 1, 0] += x[:, 1] * self.d_B_matrix[:, 1, 1]
            d_state = d_state[..., None]

            return np.concatenate((d_input, d_state, self.d_output), axis=-1)

        def __call__(self, _):
            self.inputs = np.concatenate((self.inputs, self.state[:, None, :]),
                                         axis=1)
            return self.state

        def get_inputs(self):
            return self.inputs

        def get_targets(self):
            return self.targets

        def reset(self, init=None):
            self.act_count = 0
            self.d_act_count = 0
            self.state = (self.init_state.copy()
                          if init is None else init.copy())
            self.inputs = np.zeros((self.shape[0], 0, self.shape[-1]),
                                   dtype=np.float32)
            self.B_matrix = self.d_B_matrix = None

    # static A matrix (converts velocity into a change in position)
    A = [[1, 0], [0.2, 1]]

    # dynamic B(s) matrix (converts input into velocity, modulated by current
    # state)
    # note that this dynamic B matrix doesn't really make much sense, it's
    # just here to demonstrate what happens with a plant whose dynamics
    # change over time
    def B(state):
        B = np.zeros((state.shape[0], state.shape[1], state.shape[1]))
        B[:, 1, 1] = np.tanh(state[:, 0])

        d_B = np.zeros((state.shape[0], state.shape[1], state.shape[1]))
        d_B[:, 1, 1] = 1 - np.tanh(state[:, 0])**2

        return B, d_B

    # initial position
    init_state = np.zeros((n_inputs, 2))
    init_state[:, 0] = np.linspace(-1, 1, n_inputs)

    # the target will be to end at position 1 with velocity 0
    targets = np.ones((n_inputs, sig_len, 2), dtype=np.float32)
    targets[:, :, 1] = 0
    targets[:, :-1, :] = np.nan

    plant = Plant(A, B, targets, init_state)

    rnn = hf.RNNet(shape=[2, 16, 2],
                   layers=[Linear(), Tanh(), plant],
                   W_init_params={"coeff": 0.1},
                   W_rec_params={"coeff": 0.1},
                   use_GPU=use_GPU,
                   rng=np.random.RandomState(0),
                   debug=False)

    rnn.run_batches(plant,
                    None,
                    HessianFree(CG_iter=20, init_damping=10),
                    max_epochs=150,
                    plotting=True,
                    print_period=None)

    outputs = rnn.forward(plant, rnn.W)

    try:
        assert rnn.loss.batch_loss(outputs, targets) < 1e-2
    except AssertionError:
        plt.figure()
        plt.plot(outputs[-1][:, :, 0].squeeze().T)
        plt.plot(outputs[-1][:, :, 1].squeeze().T)
        plt.title("outputs")
        plt.savefig("test_plant_outputs.png")

        raise