t = 0
    track_r = []
    while True:
        # if RENDER: env.render()
        env.render()

        a = actor.choose_acton(s)

        s_, r, done, info = env.step(a)
        position, velocity = s_
        r = abs(position + 0.46)

        track_r.append(r)

        td_error = critic.learn(s, r, s_)
        actor.learn(s, a, td_error)

        s = s_
        t += 1

        if done:
            ep_rs_sum = sum(track_r)

            if 'running_reward' not in globals():
                running_reward = ep_rs_sum
            else:
                running_reward = running_reward * 0.95 + ep_rs_sum * 0.05
            if running_reward > 200:
                RENDER = True

            print("episode: ", i_episode, " reward: ", int(running_reward))
Exemple #2
0
    action = actor.choose_action(observation)

    running_reward = 0
    critic.reset()
    count = 0
    while count < Tmax:
        count += 1
        if RENDER: env.render()

        observation_, reward, done, info = env.step(
            action)  # reward = -1 in all cases

        # print(action, reward, observation_)
        running_reward += reward

        if done:
            Tmax = count
            delta = critic.learn(observation, reward, observation_)
            actor.learn(observation, action, delta)
            print(i_episode, running_reward)
            if running_reward > DISPLAY_REWARD_THRESHOLD and i_episode > 1900:
                RENDER = True  # rendering
            break
        else:
            action_ = actor.choose_action(observation_)
            delta = critic.learn(observation, reward, observation_)
            actor.learn(observation, action, delta)

        observation = observation_
        action = action_