コード例 #1
0
ファイル: main.py プロジェクト: mMcaniff/MyGo
def play_matches(player1, player2, games):
    game = board.GoBoard()
    currentGameCount = 0

    scores = {"player1": 0, "player2": 0}

    switchTurn = {-1: 1, 1: -1}

    main_memory = Memory()

    while currentGameCount < games:
        game = board.GoBoard()
        rand = random.randint(0, 1) * 2 - 1
        if rand == -1:
            playerTurn = -1
            players = {
                1: {
                    "agent": player1,
                    "name": "Player 1"
                },
                -1: {
                    "agent": player2,
                    "name": "Player 2"
                }
            }
        else:
            playerTurn = 1
            players = {
                1: {
                    "agent": player2,
                    "name": "Player 2"
                },
                -1: {
                    "agent": player1,
                    "name": "Player1"
                }
            }

        game_running = 0
        turn = 0

        while game_running == 0:
            memory_1 = Memory()
            memory_2 = Memory()

            turn += 1

            # Make an action
            #action ,pi, mcts_values, nn_value = players[playerTurn]["agent"].mcts.act()
            new_state, value, preds = players[playerTurn]["agent"].act()
            print(playerTurn)
            print(players[playerTurn])
            print("Value: " + str(value))
            print(new_state.state)

            if playerTurn == -1:
                memory_1.add_to_memory(new_state.state)
            else:
                memory_2.add_to_memory(new_state.state)

            # Switch the current player
            playerTurn = switchTurn[playerTurn]

            # Update the new current players game state
            players[playerTurn]["agent"].oppAct(new_state)

            if new_state.state.is_game_over():
                print("Game " + str(currentGameCount) + " is over!")
                winner = new_state.state.get_winner()

                print(winner + " Won!")
                if winner == "Player 1":
                    memory_1.declare_win_or_loss(1)
                    memory_2.declare_win_or_loss(-1)
                else:
                    memory_2.declare_win_or_loss(1)
                    memory_1.declare_win_or_loss(-1)

                main_memory.join_memories(memory_1)
                main_memory.join_memories(memory_2)

                game_running = 1

        currentGameCount += 1
コード例 #2
0
env = gym.make("CartPole-v0")
env.seed(1)
n_actions = env.action_space.n
cartpole_model = create_cartpole_model()
memory = Memory()
learning_rate = 1e-3
optimizer = tf.train.AdamOptimizer(learning_rate)
smoothed_reward = util.LossHistory(smoothing_factor=0.9)
plotter = util.PeriodicPlotter(sec=5, xlabel='Iterations', ylabel='Rewards')

for i_episode in range(10000):
    plotter.plot(smoothed_reward.get())
    observation = env.reset()
    while True:
        action = choose_action(cartpole_model, observation)
        next_obs, reward, done, info = env.step(action)
        memory.add_to_memory(observation, action, reward)
        if done:
            total_reward = sum(memory.rewards)
            smoothed_reward.append(total_reward)
            train_step(cartpole_model,
                       optimizer,
                       observations=np.vstack(memory.observations),
                       actions=np.array(memory.actions),
                       discounted_rewards=discount_rewards(memory.rewards))
            memory.clear()
            break
        observation = next_obs

save_video_of_model(cartpole_model, "CartPole-v0")