Beispiel #1
0
              batch_size=4,
              n_actions=n_actions,
              eps_end=0.01,
              input_dims=[input_dims],
              lr=0.003)
scores, avg_scores, eps_history = [], [], []
epochs = 500

for epoch in range(epochs):
    score = 0
    done = False
    state_old = env.reset()
    # print(state_old[0].type)
    while not done:  # iterating over every timestep (state)
        env.render()
        action = agent.choose_action(state_old)
        state_new, reward, done, info = env.step(action)
        score += reward

        agent.store_transition(state_old, action, reward, state_new, done)
        agent.learn()
        state_old = state_new

    scores.append(score)
    eps_history.append(agent.epsilon)
    avg_score = np.mean(scores[-100:])
    avg_scores.append(avg_score)

    print("epoch: ", epoch, "score: %.2f " % score,
          "avg_score: %.2f " % avg_score, "epsilon: %.2f" % agent.epsilon)
    simple_plot(scores, avg_scores, epoch)
Beispiel #2
0
                  epsilon=1.0,
                  batch_size=64,
                  n_actions=4,
                  eps_end=0.01,
                  input_dims=[8],
                  lr=0.001)
    scores, eps_history = [], []
    n_games = 500

    for i in range(n_games):
        score = 0
        done = False
        observation = env.reset()

        while not done:
            action = agent.choose_action(observation)
            observation_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, observation_,
                                   done)
            agent.learn()
            observation = observation_
            #env.render()

        scores.append(score)
        eps_history.append(agent.epsilon)
        avg_score = np.mean(scores[-100:])

        print('episode ', i, 'score %.2f' % score,
              'average score %.2f' % avg_score, 'epsilon %.2f' % agent.epsilon)