Example #1
0
sess = tf.Session()

actor = Actor(sess, n_features=n_features, n_actions=n_actions, lr=lr_actor)
critic = Critic(sess, n_features=n_features, lr=lr_critic)

sess.run(tf.global_variables_initializer())

for i_episode in range(MAX_EPISODE):
    _, state = env.reset()
    step = 0
    track_r = []
    while True:

        action = actor.choose_action(state)
        _, next_state, reward, done = env.step(action)
        env.render()
        track_r.append(reward)

        td_error = critic.learn(state, reward, next_state)
        actor.learn(state, action, td_error)
        state = next_state
        step += 1

        if done or step >= MAX_EP_STEPS:
            ep_rs_sum = sum(track_r)
            if 'running_reward' not in globals():
                running_reward = ep_rs_sum
            else:
                running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
            print("episode:", i_episode, "step:", step, "  reward:", int(running_reward))
            break
Example #2
0
    grid_world.create_grid_ui(grid_world.m, grid_world.n,
                              (grid_world.start_x, grid_world.start_y),
                              (grid_world.end_x, grid_world.end_y),
                              grid_world.obstacles)

    agent = SARSAgent(actions=list(range(grid_world.action_size)))
    number_of_episodes = 10
    for episode in range(number_of_episodes):
        # reset environment and initialize state

        state = grid_world.reset()
        # get action of state from agent
        action = agent.get_action(str(state))

        while True:
            grid_world.render()

            # take action and proceed one step in the environment
            next_state, reward, done = grid_world.step(action)
            next_action = agent.get_action(str(next_state))

            # with sample <s,a,r,s',a'>, agent learns new q function
            agent.learn(str(state), action, reward, str(next_state),
                        next_action)

            state = next_state
            action = next_action

            # print q function of all states at screen
            #env.print_value_all(agent.q_table)