コード例 #1
0
def main():

    # Check if the ROM is given through argv
    filename = './Super_Mario_Land_World.gb'

    env = Environment(filename, max_steps=N_STEPS, visualize=VISUALIZE)
    env.start()
    agent = A2C_Agent(discount=0.99, epsilon=0.9, learning_rate=1e-3)

    agent_is_setup = False

    entropy_term = 0
    all_rewards = []
    all_lengths = []
    average_lengths = []

    for episode in range(N_EPOCHS):
        print("\n ", "=" * 50)
        print("Epoch {}/{}".format(episode + 1, N_EPOCHS))
        env.reset()
        state = env.obs()

        log_probs = []
        values = []
        rewards = []

        if not agent_is_setup:
            agent.setup(env.observation_space, env.action_space, use_model)
            agent_is_setup = True

        for steps in range(N_STEPS):
            # Get action from agent
            with torch.no_grad():
                action, log_prob, entropy, value = agent.get_action(state, TRAINING)

            value = value.detach().numpy()[0, 0]

            new_state, reward, done = env.step(action, steps)

            rewards.append(reward)
            values.append(value)
            log_probs.append(log_prob)
            entropy_term += entropy

            # Set obs to the new state
            state = new_state

            if done or steps == N_STEPS - 1:
                Qval, _ = agent.model.forward(torch.Tensor(new_state))
                Qval = Qval.detach().numpy()[0, 0]
                all_rewards.append(np.sum(rewards))
                all_lengths.append(steps)
                average_lengths.append(np.mean(all_lengths[-10:]))
                if episode % 10 == 0:
                    sys.stdout.write("episode: {}, reward: {}, total length: {}, average length: {} \n".format(episode,
                                                                                                               np.sum(
                                                                                                                   rewards),
                                                                                                               steps,
                                                                                                               average_lengths[
                                                                                                                   -1]))
                break

        print("Loss :", agent.train(values, rewards, log_probs, Qval, entropy_term))

    if SAVE_MODEL and TRAINING:
        date = datetime.datetime.now()
        model_name = str(date.day) + '_' + str(date.month) + '_' + str(date.hour) + '_' + agent.name + '.h5'
        agent.save_model(model_name)

    env.stop()
コード例 #2
0
ファイル: main.py プロジェクト: AntoninDuval/Mario_RL
def main():
    # Check if the ROM is given through argv
    filename = './Super_Mario_Land_World.gb'
    env = Environment(filename, max_steps=N_STEPS, visualize=VISUALIZE)
    env.start()
    agent = DQN_Agent(discount=0.9, epsilon=0.9, learning_rate=1e-5)
    avg_loss = None
    agent_is_setup = False
    min_epsilon = 0.001
    max_epsilon = 0.001

    for episode in range(N_EPOCHS):
        print("\n ", "=" * 50)
        env.reset()
        state = torch.Tensor(env.obs())
        old_state = state
        old_old_state = state
        is_a_released = torch.ones(1)
        states = [
            torch.cat((state, old_state, old_old_state), 0).view(3, 16, 20),
            is_a_released, env.mario_size
        ]
        episode_reward = 0

        if not agent_is_setup:
            agent.setup(env.observation_space, env.action_space, use_model)
            agent_is_setup = True

        for steps in range(N_STEPS):
            # Get action from agent
            actions = agent.get_action(states, TRAINING)

            new_state, reward, done = env.step(actions)

            #env.print_obs(new_state.numpy().astype(int))

            if actions[1] == 0:
                is_a_released = torch.zeros(1)
            else:
                is_a_released = torch.ones(1)

            if steps + 1 == N_STEPS:
                done = True

            episode_reward += reward

            new_states = [
                torch.cat((new_state, states[0][0, :, :], states[0][1, :, :]),
                          0).view(3, 16, 20), is_a_released, env.mario_size
            ]

            agent.update_replay_memory(states, actions, reward, new_states,
                                       done)

            # Train the neural network
            if TRAINING:
                loss = agent.train(done)
                if avg_loss is None:
                    avg_loss = loss
                else:
                    avg_loss = 0.99 * avg_loss + 0.01 * loss
            else:
                avg_loss = 0

            states = new_states

            if (steps + 1) % 20 == 0:
                print("\rAverage loss : {:.5f} --".format(avg_loss),
                      "Episode rewards: {} --".format(episode_reward),
                      "epochs {}/{} --".format(episode, N_EPOCHS),
                      "steps {}/{}".format(steps + 1, N_STEPS),
                      end="")
            if done:
                print("\n", env.level_progress_max)
                break

        agent.epsilon = max(
            min_epsilon, min(max_epsilon, 1.0 - math.log10((episode + 1) / 5)))
    if SAVE_MODEL and TRAINING:
        date = datetime.datetime.now()
        model_name = str(date.day) + '_' + str(date.month) + '_' + str(
            date.hour) + '_' + agent.name + '.h5'
        agent.save_model(model_name)

    env.stop()