예제 #1
0
    agent_2 = A2C_agent(input_dim=3, lam=0.8, gamma=0.99, lr=1e-4)
    param = torch.load('a2c_2_param2.pt')
    agent_2.model.load_state_dict(param)

    while (1):
        for event in pygame.event.get():
            # 閉じるボタンが押されたら終了
            if event.type == QUIT:
                pygame.display.quit()
                # Pygameの終了(画面閉じられる)
                pygame.quit()
                sys.exit()

        # black 1 white 2
        board, changeable_Pos, Position_Row, Position_Col, Change_Position, done = othello.make(
        )
        game.updateBoard(board)
        for _ in range(100):
            #setrow, setcol = agent_1.take_action(board, changeable_Pos, Position_Row, Position_Col, Change_Position)
            setrow, setcol = agent_1.take_determ_action(
                board, changeable_Pos, Position_Row, Position_Col,
                Change_Position)

            board, changeable_Pos, Position_Row, Position_Col, Change_Position, done = othello.step(
                setrow, setcol)

            game.updateBoard(board)
            if done:
                time.sleep(0.5)
                break
예제 #2
0
def get_play_data(agent_1, agent_2):
    othello = Othello()

    first_states = deque()
    first_rewards = deque()
    first_actions = deque()

    second_states = deque()
    second_rewards = deque()
    second_actions = deque()

    board, changeable_Pos, Position_Row, Position_Col, Change_Position, done = othello.make(
    )

    while not done:

        if othello.color == 1:
            state = getState(board, changeable_Pos, Position_Row, Position_Col,
                             Change_Position)
            first_states.appendleft(state)
            reward_1, reward_2 = getReward(board)
            first_rewards.appendleft(reward_1)

            setrow, setcol = agent_1.take_action(board, changeable_Pos,
                                                 Position_Row, Position_Col,
                                                 Change_Position)
            board, changeable_Pos, Position_Row, Position_Col, Change_Position, done = othello.step(
                setrow, setcol)

            first_actions.appendleft(8 * setrow + setcol)

        else:
            state = getState(board, changeable_Pos, Position_Row, Position_Col,
                             Change_Position)
            second_states.appendleft(state)
            reward_1, reward_2 = getReward(board)
            second_rewards.appendleft(reward_2)

            setrow, setcol = agent_2.take_action(board, changeable_Pos,
                                                 Position_Row, Position_Col,
                                                 Change_Position)
            board, changeable_Pos, Position_Row, Position_Col, Change_Position, done = othello.step(
                setrow, setcol)

            second_actions.appendleft(8 * setrow + setcol)

    state = getState(board, changeable_Pos, Position_Row, Position_Col,
                     Change_Position)
    reward_1, reward_2 = getReward(board)

    first_states.appendleft(state)
    second_states.appendleft(state)
    first_states = torch.FloatTensor(first_states)
    second_states = torch.FloatTensor(second_states)

    first_actions = torch.FloatTensor(first_actions)
    second_actions = torch.FloatTensor(second_actions)

    first_rewards.appendleft(reward_1)
    second_rewards.appendleft(reward_2)
    first_rewards = torch.FloatTensor(list(first_rewards)[:-1])
    second_rewards = torch.FloatTensor(list(second_rewards)[:-1])

    discount_rate = 1
    if reward_1 > reward_2:
        first_values = torch.FloatTensor(
            [discount_rate**i for i in range(len(first_rewards))])
        second_values = torch.FloatTensor(
            [-discount_rate**i for i in range(len(second_rewards))])
    elif reward_1 < reward_2:
        first_values = torch.FloatTensor(
            [-discount_rate**i for i in range(len(first_rewards))])
        second_values = torch.FloatTensor(
            [discount_rate**i for i in range(len(second_rewards))])
    else:
        first_values = torch.FloatTensor(
            [0 for i in range(len(first_rewards))])
        second_values = torch.FloatTensor(
            [0 for i in range(len(second_rewards))])

    data_first = {
        'states': first_states,
        'rewards': first_rewards,
        'actions': first_actions,
        'values': first_values,
    }

    data_secound = {
        'states': second_states,
        'rewards': second_rewards,
        'actions': second_actions,
        'values': second_values,
    }

    return data_first, data_secound