Exemple #1
0
    def test_list_num_iters(self):

        lc = LearningCurve(self.fpath)
        lc.parse()
        x = lc.list('NumIters')
        dx = np.diff(x)
        assert_true(np.all(dx > 0))
 def test_list_invalid_key(self):
     
     lc = LearningCurve(self.fpath)
     lc.parse()
     assert_raises(KeyError, lc.list, 'wrong-key', phase=Phase.TRAIN)
     assert_raises(KeyError, lc.list, 'wrong-key', phase=Phase.TEST)
     assert_raises(KeyError, lc.list, 'accuracy', phase=Phase.TRAIN)
 def test_list_num_iters(self):
     
     lc = LearningCurve(self.fpath)
     lc.parse()
     x = lc.list('NumIters')
     dx = np.diff(x)
     assert_true(np.all(dx > 0))
Exemple #4
0
    def test_list_invalid_key(self):

        lc = LearningCurve(self.fpath)
        lc.parse()
        assert_raises(KeyError, lc.list, 'wrong-key', phase=Phase.TRAIN)
        assert_raises(KeyError, lc.list, 'wrong-key', phase=Phase.TEST)
        assert_raises(KeyError, lc.list, 'accuracy', phase=Phase.TRAIN)
 def test_list_loss_acc(self):
     
     lc = LearningCurve(self.fpath)
     lc.parse()
     loss = lc.list('loss')
     acc = lc.list('accuracy')
     assert_equal(loss.shape, acc.shape)
     assert_false(np.all(loss == acc))
Exemple #6
0
    def test_keys_parsed(self):

        lc = LearningCurve(self.fpath)
        train_keys, test_keys = lc.parse()
        assert_list_equal(train_keys,
                          ['NumIters', 'Seconds', 'LearningRate', 'loss'])
        assert_list_equal(
            test_keys,
            ['NumIters', 'Seconds', 'LearningRate', 'accuracy', 'loss'])
Exemple #7
0
    def test_list(self):

        lc = LearningCurve(self.fpath)
        lc.parse()
        x = lc.list('NumIters')
        assert_greater(x.size, 0)
        loss = lc.list('loss')
        assert_equal(x.shape, loss.shape)
        acc = lc.list('accuracy')
        assert_equal(x.shape, acc.shape)
Exemple #8
0
    def test_list_loss_acc(self):

        lc = LearningCurve(self.fpath)
        lc.parse()
        loss = lc.list('loss')
        acc = lc.list('accuracy')
        assert_equal(loss.shape, acc.shape)
        assert_false(np.all(loss == acc))
 def test_list(self):
     
     lc = LearningCurve(self.fpath)
     lc.parse()
     x = lc.list('NumIters')
     assert_greater(x.size, 0)
     loss = lc.list('loss')
     assert_equal(x.shape, loss.shape)
     acc = lc.list('accuracy')
     assert_equal(x.shape, acc.shape)
 def test_name(self):
     
     lc = LearningCurve(self.fpath)
     assert_is_instance(lc.name(), str)
     assert_greater(len(lc.name()), 0, 'name is empty')
 def test_keys_parsed(self):
     
     lc = LearningCurve(self.fpath)
     train_keys, test_keys = lc.parse()
     assert_list_equal(train_keys, ['NumIters', 'Seconds', 'LearningRate', 'loss'])
     assert_list_equal(test_keys, ['NumIters', 'Seconds', 'LearningRate', 'accuracy', 'loss'])
Exemple #12
0
    def test_name(self):

        lc = LearningCurve(self.fpath)
        assert_is_instance(lc.name(), str)
        assert_greater(len(lc.name()), 0, 'name is empty')
Exemple #13
0
env.reset()
# Take one random step to get the pole and cart moving
obs, rew, done, _ = env.step(env.action_space.sample())
reward = reward_func(obs, rew)
state = state_func(obs)

memory = Memory(max_size=memory_size)

if gym_env_name == 'CartPole-v0':
    max_util = 200
else:
    max_util = 500

curve = LearningCurve(plots=[('utility', 'left', 'r'),
                             ('epsilon', 'right', 'b')],
                      episode_range=1000,
                      min_y_left=0,
                      max_y_left=max_util)

# Make a bunch of random actions and store the experiences
for ii in range(pretrain_length):
    # Uncomment the line below to watch the simulation
    # env.render()

    # Make a random action
    action = env.action_space.sample()
    obs, rew, done, _ = env.step(action)
    reward = reward_func(obs, rew)
    next_state = state_func(obs)

    if done:
Exemple #14
0
    def q_learning(self,
                   gamma=0.9,
                   alpha=0.3,
                   episodes=100,
                   max_steps=50,
                   fps=30,
                   epsilon_0=-1.0,
                   plot=False):
        ''' Q learning
        q: change view (state values or q values)
        s: change speed (slow or fast)
        e: Explore or not
        '''

        if plot:
            l_curve = LearningCurve(min_y=-1.5, max_y=1.5)

        self.state_values, self.state_q_values = self.init_values()
        flag_q = False
        flag_fast = False
        flag_exit = False
        flag_explore = True

        episode = 0
        while (True):
            # for episode in range(episodes):
            if flag_explore:
                if epsilon_0 >= 0.0:
                    epsilon = epsilon_0
                else:
                    epsilon = np.exp(-episode / (episodes / 5))
            else:
                epsilon = 0.0
            state = random.choice(self.states)
            done = False
            explore = False
            action = ''
            utility = 0.0
            reward = 0.0
            for step in range(max_steps):
                # while(True):
                self.r_draw_background()
                if not flag_q:
                    self.r_draw_values()
                else:
                    self.r_draw_q_values()
                self.r_draw_agent(state)
                self.r_draw_reward(reward, utility, done)
                # self.r_draw_rl_metrics(f'{episode+1}/{episodes}', epsilon, action, explore)
                self.r_draw_rl_metrics(episode + 1, epsilon, action, explore)
                pygame.display.flip()
                if flag_fast:
                    key = self.tick_key(fps)
                else:
                    key = self.tick_key(1)
                if key == pygame.K_q:
                    flag_q = not flag_q
                elif key == pygame.K_s:
                    flag_fast = not flag_fast
                elif key == pygame.K_e:
                    flag_explore = not flag_explore
                elif key == pygame.K_ESCAPE:
                    flag_exit = True
                    break
                if done:
                    break
                if np.random.uniform() < epsilon:
                    explore = True
                    action = random.choice(self.allowed_actions[state])
                else:
                    explore = False
                    action = self.policy[state]
                new_state, reward, _, done = self.step(state, action)
                if done:
                    sample = reward
                else:
                    sample = reward + gamma * self.max_val(
                        self.state_q_values[new_state])
                self.state_q_values[state][action] = (
                    1 - alpha
                ) * self.state_q_values[state][action] + alpha * sample
                self.policy[state], self.state_values[state] = self.key_max(
                    self.state_q_values[state])
                utility += (gamma**step) * reward
                state = new_state
            if plot:
                l_curve.add_sample(episode, utility)
            if flag_exit:
                break
            episode += 1