コード例 #1
0
ファイル: taxi.py プロジェクト: anetczuk/pybraingym
render_demo = False
imax = 5000
period_print = 100
eval_periods = 100

print("\nStarting")

total_reward = 0
period_rewards = deque(maxlen=2 * period_print)
best_avg_reward = float('-inf')

procStartTime = time.time()

for i in range(1, imax + 1):
    doEpisode(experiment)
    processLastReward(task, agent)  ## store final reward for learner
    agent.learn()

    reward = task.getCumulativeReward()
    total_reward += reward
    period_rewards.append(reward)
    avg_reward = np.mean(period_rewards)
    if avg_reward > best_avg_reward:
        best_avg_reward = avg_reward

    if i % period_print == 0:
        print(
            "Episode ended: %i/%i period reward: %f total reward: %d best avg reward: %f rate: %f"
            % (i, imax, avg_reward, total_reward, best_avg_reward,
               total_reward / i))
コード例 #2
0
render_demo = False
render_steps = False
imax = 7000
period_print = 100
eval_periods = 100

print("\nStarting")

total_reward = 0
period_reward = 0

procStartTime = time.time()

for i in range(1, imax + 1):
    doEpisode(experiment, render_steps)

    reward = task.getCumulativeReward()
    total_reward += reward
    period_reward += reward
    processLastReward(task, agent)  ## store final reward for learner

    agent.learn()

    if i % period_print == 0:
        epsil = explorer.epsilon
        print(
            "Episode ended: %i/%i period reward: %f total reward: %d rate: %f epsilon: %f"
            % (i, imax, period_reward / period_print, total_reward,
               total_reward / i, epsil))
        period_reward = 0
コード例 #3
0
 def doEpisode(self, demonstrate=False):
     doEpisode(self.exp, demonstrate)