예제 #1
0
    def setUpClass(cls):
        """Initializes a simple instance of network for testing."""

        n_in = 2
        n_hidden = 8
        n_out = 2

        W_in = np.ones((n_hidden, n_in))
        W_rec = np.eye(n_hidden)
        W_out = np.ones((n_out, n_hidden))

        b_rec = np.ones(n_hidden)
        b_out = np.ones(n_out)

        alpha = 0.6

        cls.rnn = RNN(W_in,
                      W_rec,
                      W_out,
                      b_rec,
                      b_out,
                      activation=tanh,
                      alpha=alpha,
                      output=softmax,
                      loss=softmax_cross_entropy)
예제 #2
0
    def setUpClass(cls):

        cls.W_in = np.eye(2)
        cls.W_rec = np.eye(2)
        cls.W_out = np.eye(2)
        cls.W_FB = -np.ones((2, 2)) + np.eye(2)
        cls.b_rec = np.zeros(2)
        cls.b_out = np.zeros(2)

        cls.rnn = RNN(cls.W_in,
                      cls.W_rec,
                      cls.W_out,
                      cls.b_rec,
                      cls.b_out,
                      activation=identity,
                      alpha=1,
                      output=softmax,
                      loss=softmax_cross_entropy)

        cls.rnn.a = np.ones(2)
        cls.rnn.error = np.ones(2) * 0.5
예제 #3
0
    def test_mimic_task(self):
        """Verifies that the proper RNN output is returned as label in a simple
        case where the RNN simply counts the number of time steps."""

        from core import RNN
        from functions import identity, mean_squared_error

        n_in = 2
        n_h = 2
        n_out = 2

        W_in_target = np.eye(n_in)
        W_rec_target = np.eye(n_h)
        W_out_target = np.eye(n_out)
        b_rec_target = np.zeros(n_h)
        b_out_target = np.zeros(n_out)

        alpha = 1

        rnn_target = RNN(W_in_target,
                         W_rec_target,
                         W_out_target,
                         b_rec_target,
                         b_out_target,
                         activation=identity,
                         alpha=alpha,
                         output=identity,
                         loss=mean_squared_error)

        task = Mimic_RNN(rnn_target, p_input=1, tau_task=1)
        data = task.gen_data(100, 0)

        y = np.arange(1, 101)
        y_correct = np.array([y, y]).T

        self.assertTrue(np.isclose(data['train']['Y'], y_correct).all())
예제 #4
0
    def test_kernl_reduce_rflo(self):
        """Verifies that KeRNL reduces to RFLO in special case.

        If beta is initialized to the identity while the gammas are all
        initialized to the network inverse time constant alpha, and the KeRNL
        optimizer has 0 learning rate (i.e. beta and gamma do not change), then
        KeRNL should produce the same gradients as RFLO if the approximate
        KeRNL of (1 - alpha) (rather than exp(-alpha)) is used."""

        self.task = Add_Task(4, 6, deterministic=True, tau_task=2)
        self.data = self.task.gen_data(100, 0)

        alpha = 0.3

        #RFLO
        np.random.seed(1)
        self.rnn_1 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)
        self.optimizer_1 = Stochastic_Gradient_Descent(lr=0.001)
        self.learn_alg_1 = RFLO(self.rnn_1, alpha)
        #KeRNL with beta and gamma fixed to RFLO values
        np.random.seed(1)
        self.rnn_2 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)
        self.optimizer_2 = Stochastic_Gradient_Descent(lr=0.001)
        self.KeRNL_optimizer = Stochastic_Gradient_Descent(lr=0)
        A = np.eye(self.rnn_2.n_h)
        alpha_i = np.ones(self.rnn_2.n_h) * alpha
        self.learn_alg_2 = KeRNL(self.rnn_2,
                                 self.KeRNL_optimizer,
                                 A=A,
                                 alpha=alpha_i)

        monitors = []

        np.random.seed(2)
        self.sim_1 = Simulation(self.rnn_1)
        self.sim_1.run(self.data,
                       learn_alg=self.learn_alg_1,
                       optimizer=self.optimizer_1,
                       monitors=monitors,
                       verbose=False)

        np.random.seed(2)
        self.sim_2 = Simulation(self.rnn_2)
        self.sim_2.run(self.data,
                       learn_alg=self.learn_alg_2,
                       optimizer=self.optimizer_2,
                       monitors=monitors,
                       verbose=False)

        #Assert networks learned the same weights
        assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec)
        #Assert networks' parameters changed appreciably, despite a large
        #tolerance for closeness.
        self.assertFalse(np.isclose(self.W_rec, self.rnn_2.W_rec).all())
예제 #5
0
    def test_small_lr_case(self):

        alpha = 1

        self.rnn_1 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        self.rnn_2 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        self.rnn_3 = RNN(self.W_in,
                         self.W_rec,
                         self.W_out,
                         self.b_rec,
                         self.b_out,
                         activation=tanh,
                         alpha=alpha,
                         output=softmax,
                         loss=softmax_cross_entropy)

        lr = 0.00001
        self.optimizer_1 = Stochastic_Gradient_Descent(lr=lr)
        self.learn_alg_1 = RTRL(self.rnn_1)
        self.optimizer_2 = Stochastic_Gradient_Descent(lr=lr)
        self.learn_alg_2 = Future_BPTT(self.rnn_2, 25)
        self.optimizer_3 = Stochastic_Gradient_Descent(lr=lr)
        self.learn_alg_3 = Efficient_BPTT(self.rnn_3, 100)

        monitors = []

        np.random.seed(1)
        self.sim_1 = Simulation(self.rnn_1)
        self.sim_1.run(self.data,
                       learn_alg=self.learn_alg_1,
                       optimizer=self.optimizer_1,
                       monitors=monitors,
                       verbose=False)

        np.random.seed(1)
        self.sim_2 = Simulation(self.rnn_2)
        self.sim_2.run(self.data,
                       learn_alg=self.learn_alg_2,
                       optimizer=self.optimizer_2,
                       monitors=monitors,
                       verbose=False)

        np.random.seed(1)
        self.sim_3 = Simulation(self.rnn_3)
        self.sim_3.run(self.data,
                       learn_alg=self.learn_alg_3,
                       optimizer=self.optimizer_3,
                       monitors=monitors,
                       verbose=False)

        #Assert networks learned similar weights with a small tolerance.
        assert_allclose(self.rnn_1.W_rec, self.rnn_2.W_rec, atol=1e-4)
        assert_allclose(self.rnn_2.W_rec, self.rnn_3.W_rec, atol=1e-4)
        #But that there was some difference from initialization
        self.assertFalse(
            np.isclose(self.rnn_1.W_rec, self.W_rec, atol=1e-4).all())