Beispiel #1
0
    def train(self, total_games=500):

        fixed_epsilon = None
        alpha = 0.5
        gamma = 0.9
        epsilon = 1

        agent = QLearningAgent(epsilon=epsilon, fixed_epsilon=fixed_epsilon, alpha=alpha, gamma=gamma, total_games=total_games)
        for game_number in range(total_games):

            last_action = None

            while True:
                game_result = game.game_results()
                if game_result is not None:
                    pygame.display.flip()
                    game.reset()
                    game.initialize()
                    break

                last_action = game.play(agent)

                pygame.display.flip()

        agent.save_policy()
        return agent
Beispiel #2
0
from tqdm import tqdm


def q(text=''):
    print(f'>{text}<')
    sys.exit()


from environment import TicTacToe
from agent import QLearningAgent, Hoooman
import config as cfg
from config import display_board

# initializing the TicTacToe environment and a QLearningAgent (the master Tic-Toc-Toc player, your opponent!)
env = TicTacToe()
player1 = QLearningAgent(name=cfg.playerX_QLearningAgent_name)
player1.loadQtable()  # load the learnt Q-Table
player1.epsilon = 0.0  # greedy actions only, 0 exploration

# initializing the agent class that let's you, the human user take the actions in the game
player2 = Hoooman()

# replay decides whether to rematch or not, at the end of a game
replay = True
while replay:

    done = False  # the episode goes on as long as done is False

    # deciding which player makes a move first
    playerID = random.choice([True, False])  # True means player1
Beispiel #3
0
if __name__ == '__main__':
    episode = 30  # 训练多少回合

    epsilon = 0.8  # 使用历史经验的概率, 若值为0.9,则有 90% 的情况下,会根据历史经验选择 action, 10% 的情况下,随机选择 action
    learning_rate = 0.01  # 根据公式可知,该值越大,则旧训练数据被保留的就越少
    discount_factor = 0.9  #

    from maze_game.game import startGame
    env = startGame()

    key = input('Do you want to see the training process? [y] ')
    key = key == 'y' or key == ''
    
    agent = QLearningAgent(
        epsilon=epsilon,
        learning_rate=learning_rate,
        discount_factor=discount_factor,
        actions=Game.DIRECTION.ACTIONS
    )
    successful_step_counter_arr = []
    failed_step_counter_arr = []

    if key:
        env.display()

    for eps in range(1, episode + 1):

        cur_state = env.reset()
        step_counter = 0

        while True:
            step_counter += 1
Beispiel #4
0
import numpy as np
import pandas as pd
from env import TicTacToeEnv
from agent import QLearningAgent

env = TicTacToeEnv()
agent = QLearningAgent(env)

for game_nr in range(1000000):
    if game_nr % 10000 == 0:
        print(game_nr)
    done = False
    s = env.reset().copy()
    # print('Init', s)
    while not done:
        a = agent.take_action(s)
        r, s_, done, _ = env.step(a)
        agent.learn(s, a, r, s_, done)
        # print(s, a, r, s_, done)
        s = s_.copy()

V = pd.DataFrame.from_dict(agent._V,
                           orient='index',
                           dtype=np.float32,
                           columns=['V'])
N = pd.DataFrame.from_dict(agent._N,
                           orient='index',
                           dtype=np.uint32,
                           columns=['N'])
df = V.merge(N, how='left', left_index=True, right_index=True)
states = pd.DataFrame(df.index.values.tolist(), index=df.index)