Exemple #1
0
    def testFifo(self):
        """
        testing fifo and _update_state method in VectorRegressionWithVariance
        """
        print("\n * testing fifo and update_state method "
              "in VectorRegressionWithVariance \n")
        in_dim = 3
        order = 3

        len_ts = 10

        model = VectorRegressionWithVariance(in_dim, in_dim, order)
        random = amath.random.RandomState(0)
        in_patterns = amath.random.uniform(size=(len_ts, in_dim))
        fifo_test = amath.zeros((order, in_dim))
        for i in xrange(len_ts):
            self.assertTrue(amath.allclose(model.fifo.to_array(), fifo_test))
            model.learn_one_step(in_patterns[i])
            popped_in_pattern = model._update_state(in_patterns[i])
            if i < order:
                self.assertTrue(
                    amath.allclose(popped_in_pattern, amath.zeros(in_dim)))
            else:
                self.assertTrue(
                    amath.allclose(popped_in_pattern, in_patterns[i - order]))
            fifo_test[1:] = fifo_test[:-1]
            fifo_test[0] = in_patterns[i]
Exemple #2
0
    def testFifo(self):
        """
        testing fifo and _update_state method in MultiTargetVectorRegression
        """
        print("\n * testing fifo and update_state method "
              "in MultiTargetVectorRegression \n")
        in_dim = 3
        out_dims = [2, 4]
        SGDs = [AdaGrad(), AdaGrad()]
        order = 3

        len_ts = 10

        model = MultiTargetVectorRegression(in_dim, out_dims, SGDs, order)
        random = amath.random.RandomState(0)
        in_patterns = amath.random.uniform(size=(len_ts, in_dim))
        out_pattern_0 = amath.random.uniform(size=(len_ts, out_dims[0]))
        out_pattern_1 = amath.random.uniform(size=(len_ts, out_dims[1]))
        fifo_test = amath.zeros((order, in_dim))
        for i in xrange(len_ts):
            self.assertTrue(
                amath.allclose(model.layers[0].fifo.to_array(), fifo_test))
            model.learn_one_step([out_pattern_0[i], out_pattern_1[i]])
            popped_in_pattern = model._update_state(in_patterns[i])
            if i < order:
                self.assertTrue(
                    amath.allclose(popped_in_pattern, amath.zeros(in_dim)))
            else:
                self.assertTrue(
                    amath.allclose(popped_in_pattern, in_patterns[i - order]))
            fifo_test[1:] = fifo_test[:-1]
            fifo_test[0] = in_patterns[i]
Exemple #3
0
    def test_update_state(self):
        print("""
        ---------------------------------
            test updating internal states
        ---------------------------------
        """)
        anc_test = amath.random.uniform(self.low, self.high,
                                        (self.n_anc, self.dim))
        model = FunctionalDyBM(
            self.dim, anc_test, self.delay, self.decay_rates)
        loc = amath.random.uniform(self.low, self.high, (self.n_obs, self.dim))

        for i in range(10):
            pattern = amath.zeros(self.n_obs)
            pattern[i % self.n_obs] = 1
            fifo_old = model.fifo[:-1]
            fifo_last = model.fifo[-1]
            e_trace_old = model.e_trace

            model._update_state(pattern, loc)

            # check fifo update
            self.assertTrue(amath.allclose(model.fifo[1:], fifo_old))

            # check e_trace update
            e_trace_new = amath.zeros((self.n_elg, self.n_anc)) + fifo_last
            for k in xrange(e_trace_new.shape[0]):
                e_trace_new[k, :] = e_trace_new[k, :] \
                    + self.decay_rates[k] * e_trace_old[k, :]
            self.assertTrue(amath.allclose(model.e_trace, e_trace_new))

            # check mu_anc update
            self.assertTrue(amath.allclose(model.mu, amath.zeros(self.n_anc)))

        print("mu(P) = {}".format(model.mu))
        return 0
Exemple #4
0
 def test_u_tilde(self):
     """ check _get_u_tilde
     """
     in_patterns = [amath.array([1, 0, 0, 0]),
                    amath.array([0, 1, 0, 0]),
                    amath.array([0, 0, 1, 0])]
     self.model._update_state(in_patterns[2])
     self.model._update_state(in_patterns[1])
     self.model._update_state(in_patterns[0])
     u_tilde_test = amath.zeros(self.dim_hidden)
     for d in xrange(self.order):
         u_tilde_test += self.model.variables["U"][d, d, :]
     u_tilde_test += self.model.variables["b_h"]
     self.assertTrue(amath.allclose(u_tilde_test, self.model._get_u_tilde()))
     pass
Exemple #5
0
def experiment(period,
               std,
               delay,
               decay,
               Nh,
               repeat,
               bidirectional,
               sigma=0.01):
    """
    A run of experiment

    Parameters
    ----------
    period : int
        period of the wave
    std : float
        standard deviation of noise
    delay : int
        delay
    decay : list
        list of decay rates
    Nh : int
        number of hidden units
    repeat : int
        number of iterations of training
    bidirectional : boolean
        whether to train bidirectionally
    sigma : float
        std of random initialization
        0.01 is recommended in
        https://www.cs.toronto.edu/~hinton/absps/guideTR.pdf
    """
    """
    Prepare data generators
    """

    dim = 1  # dimension of the wave
    phase = amath.zeros(dim)  # initial phase of the wave

    # forward sequence
    wave = NoisySawtooth(0, period, std, dim, phase, False)
    wave.reset(seed=0)

    # backward sequence
    revwave = NoisySawtooth(0, period, std, dim, phase, True)
    revwave.reset(seed=1)
    """
    Prepare a Gaussian Bernoulli DyBM
    """

    Nv = dim  # number of visible units

    sgd = AdaGrad()
    dybm = GaussianBernoulliDyBM([delay, delay], [decay, decay], [Nv, Nh],
                                 [sgd, deepcopy(sgd)],
                                 sigma=sigma,
                                 insert_to_etrace="w_delay")
    dybm.layers[0].layers[1].SGD.set_learning_rate(0)
    dybm.layers[1].layers[1].SGD.set_learning_rate(0)
    """
    Learn
    """
    error = list()  # list of numpy array
    bi_end = 0.5
    bi_factor = 2
    for i in range(repeat):
        # update internal states by reading forward sequence
        wave.add_length(period)
        dybm.get_predictions(wave)

        if bidirectional and i % (bi_factor + 1) == 0 and bi_factor > 0 \
           and i < repeat * bi_end:
            # make a time-reversed DyBM
            dybm._time_reversal()

            # update internal states by reading backward sequence
            revwave.add_length(period)
            dybm.get_predictions(revwave)

            # learn backward sequence for one period
            revwave.add_length(period)
            dybm.learn(revwave, get_result=False)

            # make a non time-reversed DyBM
            dybm._time_reversal()
        else:
            # update internal states by reading forward sequence
            wave.add_length(period)
            dybm.get_predictions(wave)

            # learn forward sequence
            wave.add_length(period)
            result = dybm.learn(wave, get_result=True)

            if i % (bi_factor + 1) == bi_factor:
                rmse = RMSE(result["actual"], result["prediction"])
                rmse = amath.to_numpy(rmse)
                error.append(rmse)

    return error, dybm, wave
Exemple #6
0
    def test(self):
        env = FourArmedBandit()
        observation = env.reset()
        action_dim = env.action_space.n
        state_dim = env.observation_space.n

        print(
            '\n\nUnit testing DySARSA RL Agent on four armed bandit problem\n')
        print('action dim is :', action_dim)
        print('state_dim is :', state_dim)

        print('Number of bandits: ', env.n_bandits)
        """
        DySARSA model parameters

        """
        delay = 2
        decay = [0.2]
        discount = 0.99
        temperature = 0.1

        SGD = ADAM()
        learning_rate = 0.01

        init_epsilon = 0.3  # initial exploration term (epsilon-greedy)
        final_epsilon = 0.1  # final exploration term (when using annealing)

        steps_per_episode = 10
        train_steps = 1000
        num_bandits = env.n_bandits
        total_reward = amath.zeros(num_bandits)

        amath.random.seed(7)
        """Create DySARSA function approximator model"""
        DySARSA_model = DYSARSA(state_dim,
                                action_dim,
                                delay,
                                decay,
                                discount,
                                SGD,
                                learning_rate,
                                temperature,
                                insert_to_etrace="w_delay",
                                L1=0.00)

        print(
            '\nTraining new agent with DySARSA RL agent and Boltzmann policy')

        agent = DySARSAAgent(env=env,
                             model=DySARSA_model,
                             steps_per_episode=steps_per_episode,
                             train_steps=train_steps,
                             exploration="Boltzmann",
                             init_epsilon=temperature,
                             final_epsilon=final_epsilon,
                             UseQLearning=False,
                             frame_skip=False,
                             suppress_print=True)

        agent.fit(test_every=2, test_num_eps=5, break_reward=100, render=False)

        print('\nTesting DySARSA RL agent with Boltzmann Policy')

        agent = DySARSAAgent(env=env, model=DySARSA_model, suppress_print=True)

        for i in range(5):
            reward = agent.predict(render=True)
            total_reward[agent.action] += reward

        print("\nAverage reward predicted for each arm is: ", total_reward / 5)

        print("The agent thinks bandit " + str(agent.action + 1) +
              " is the most rewarding....")
        if agent.action == num_bandits - 1:
            print("and the prediction is correct!")
        else:
            print("and the prediction is incorrect!")

        print("\n************************************")

        self.assertEqual(agent.action, num_bandits - 1)

        total_reward = amath.zeros(num_bandits)

        print(
            '\nTraining new agent with DySARSA RL agent and epsilon-greedy policy'
        )

        agent = DySARSAAgent(env=env,
                             model=DySARSA_model,
                             steps_per_episode=steps_per_episode,
                             train_steps=train_steps,
                             exploration="greedy",
                             init_epsilon=init_epsilon,
                             final_epsilon=final_epsilon,
                             UseQLearning=False,
                             frame_skip=False,
                             suppress_print=True)

        agent.fit(test_every=2, test_num_eps=5, break_reward=100, render=False)

        print('\nTesting DySARSA RL agent with epsilon-greedy Policy')

        agent = DySARSAAgent(env=env, model=DySARSA_model, suppress_print=True)

        for i in range(5):
            reward = agent.predict(render=True)
            total_reward[agent.action] += reward

        print("\nAverage reward predicted for each arm is: ", total_reward / 5)

        print("The agent thinks bandit " + str(agent.action + 1) +
              " is the most rewarding....")
        if agent.action == num_bandits - 1:
            print("and the prediction is correct!")
        else:
            print("and the prediction is incorrect!")

        self.assertEqual(agent.action, num_bandits - 1)