Esempio n. 1
0
        cur_state = env.reset()
        step_counter = 0

        while True:
            step_counter += 1

            if key:
                env.display()

            action = agent.choose_action(cur_state)

            next_state, reward = env.move(action)

            agent.learn(
                cur_state=cur_state,
                action=action,
                reward=reward,
                next_state=next_state
            )

            cur_state = next_state

            if reward != 0:
                break

        if reward > 0:
            successful_step_counter_arr.append(step_counter)
        elif reward < 0:
            failed_step_counter_arr.append(step_counter)

        if key:
            print(
Esempio n. 2
0
from env import TicTacToeEnv
from agent import QLearningAgent

env = TicTacToeEnv()
agent = QLearningAgent(env)

for game_nr in range(1000000):
    if game_nr % 10000 == 0:
        print(game_nr)
    done = False
    s = env.reset().copy()
    # print('Init', s)
    while not done:
        a = agent.take_action(s)
        r, s_, done, _ = env.step(a)
        agent.learn(s, a, r, s_, done)
        # print(s, a, r, s_, done)
        s = s_.copy()

V = pd.DataFrame.from_dict(agent._V,
                           orient='index',
                           dtype=np.float32,
                           columns=['V'])
N = pd.DataFrame.from_dict(agent._N,
                           orient='index',
                           dtype=np.uint32,
                           columns=['N'])
df = V.merge(N, how='left', left_index=True, right_index=True)
states = pd.DataFrame(df.index.values.tolist(), index=df.index)
res = states.merge(V, how='left', left_index=True, right_index=True).merge(
    N, how='left', left_index=True, right_index=True).reset_index(drop=True)