Ejemplo n.º 1
0
def rollout(policy, state, greedy=False, show=False):
    state = bt.Breakthrough(state)
    while not state.terminated:
        # Pick the next action, either greedily or weighted by the policy
        index = -1
        if greedy:
            index = get_best_legal(state, policy)
        else:
            index = policy.get_action_index(state)
        str_move = convert_index_to_move(index, state.player)
        if show: print('Playing %s' % str_move)
        state.apply(lg.decode_move(str_move))
        if show: print(state)
    return state.reward
Ejemplo n.º 2
0
def predict():
    # Load the trained policy
    policy = CNPolicy(checkpoint=PRIMARY_CHECKPOINT)

    # Advance the game to the desired state
    history = input('Input game history: ')
    state = bt.Breakthrough()
    for part in history.split(' '):
        if len(part) == 5:
            state = bt.Breakthrough(state, lg.decode_move(part))
    print(state)

    desired_reward = 1 if state.player == 0 else -1

    # Predict the next move
    prediction = policy.get_action_probs(state)
    sorted_indices = np.argsort(prediction)[::-1][0:5]
    for index in sorted_indices:
        trial_state = bt.Breakthrough(
            state, bt.convert_index_to_move(index, state.player))
        greedy_win = rollout(policy, trial_state,
                             greedy=True) == desired_reward
        win_rate = evaluate_for(trial_state, policy, state.player)
        state_value = policy.get_state_value(trial_state)
        log("Play %s with probability %f (%s) for win rate %d%% and state-value %d%%"
            % (convert_index_to_move(index, state.player), prediction[index],
               '*' if greedy_win else '!', int(win_rate * 100),
               int(state_value * 50) + 50))  # Scale from [-1,+1] to [0,100]

    #log('MCTS evaluation...')
    #tree = mcts.MCTSTrainer(policy)
    #tree.prepare_for_eval(state)
    #tree.iterate(state=state, num_iterations=50000)

    _ = input('Press enter to play on')
    rollout(policy, state, greedy=True, show=True)
Ejemplo n.º 3
0
def evaluate_for(initial_state, policy, player, num_rollouts=1000):
    states = [bt.Breakthrough(initial_state) for _ in range(num_rollouts)]

    move_made = True
    while move_made:
        move_made = False
        for (state, action) in zip(states, policy.get_action_indicies(states)):
            if not state.terminated:
                state.apply(bt.convert_index_to_move(action, state.player))
                move_made = True

    wins = 0
    for state in states:
        if state.is_win_for(player): wins += 1
    return wins / num_rollouts
Ejemplo n.º 4
0
def compare_policies_in_parallel(our_policy, their_policy, num_matches=100):
    states = [bt.Breakthrough() for _ in range(num_matches)]

    # We start all the even numbered games, they start all the odd ones.  Advance all the odd numbered games by a turn
    # so that it's our turn in every game.
    for state in states[1::2]:
        index = their_policy.get_action_index(state)
        state.apply(bt.convert_index_to_move(index, state.player))

    # Rollout all the games to completion.
    current_policy = our_policy
    other_policy = their_policy

    move_made = True
    while move_made:
        # Compute the next move for each game in parallel
        move_made = False
        for (state, action) in zip(states,
                                   current_policy.get_action_indicies(states)):
            if not state.terminated:
                state.apply(bt.convert_index_to_move(action, state.player))
                move_made = True

        # Now it's the other player's turn, so swap policies.
        current_policy, other_policy = other_policy, current_policy

    wins = 0
    p0_wins = 0
    p1_wins = 0
    for state in states[0::2]:
        if state.is_win_for(0):
            wins += 1
            p0_wins += 1
    for state in states[1::2]:
        if state.is_win_for(1):
            wins += 1
            p1_wins += 1

    log('Wins, Wins as p0, Wins as p1 = %d, %d, %d' % (wins, p0_wins, p1_wins))
    return wins / num_matches
Ejemplo n.º 5
0
    def do_POST(self):
        global state
        global role
        global policy

        print('POST to %s' % self.path)
        length = self.headers.get('content-length')
        request = self.rfile.read(int(length)).decode('ascii')
        print('Request was %s' % request)

        response = ''
        if request.startswith('( INFO )'):
            response = '( (name Mimic) (status available) )'
        elif request.startswith('( START '):
            state = bt.Breakthrough()
            if request.split(sep=' ')[3] == 'white':
                print('We are white')
                role = 0
            else:
                print('We are black')
                role = 1
            response = 'ready'
        elif request.startswith('( PLAY '):
            parsed_play_req = re.match(r'\( PLAY [^ ]* (.*)', request)
            # Parse out the last move (remembering it's NIL at the start of the game)
            move = parsed_play_req.group(1)
            move = move.replace('noop', '').replace('move', '').replace(
                '(', '').replace(')', '').replace(' ', '')
            if move != 'NIL':
                print('Raw move was %s' % move)
                move = list(move)
                src_col = 8 - int(move[0])
                src_row = int(move[1]) - 1
                dst_col = 8 - int(move[2])
                dst_row = int(move[3]) - 1
                move = (src_row, src_col, dst_row, dst_col)
                print('Move was %s' % lg.encode_move(move))
                state.apply(move)
            else:
                print('First move')

            if state.player == role:
                prediction = policy.get_action_probs(state)
                index = np.argsort(prediction)[::-1][0]
                move = bt.convert_index_to_move(index, state.player)
                print('Playing %s' % (lg.encode_move(move)))
                src_row = move[0] + 1
                src_col = 8 - move[1]
                dst_row = move[2] + 1
                dst_col = 8 - move[3]
                response = '( move %d %d %d %d )' % (src_col, src_row, dst_col,
                                                     dst_row)
            else:
                print('Not our turn - no-op')
                response = 'noop'

        print('Responding with %s' % response)
        response_bytes = response.encode('ascii')
        self.send_response(200)
        self.send_header('Content-Length', len(response_bytes))
        self.end_headers()
        self.wfile.write(response_bytes)
        self.wfile.flush()
Ejemplo n.º 6
0
                action="store_true")

args = vars(ap.parse_args())
print(args)
board_size = args['board']
time = args['time']
num_games = args['games']
output = args['debug']
use_eval = args['eval']
ab_agent_param = {'iterdepth': args['iterdepth'], 'eval': args['eval']}
mc_agent_param = {
    'simulations': args['simulations'],
    'advanced': args['advanced']
}

game = breakthrough.Breakthrough(board_size, board_size)
print(game)
agents = [
    ab_agent.ABAgent(ab_agent_param),
    mcts_agent.MCTSAgent(mc_agent_param)
]

ScoreColor = {'White': 0, 'Black': 0}
ScoreAgent = {agents[0].name(): 0, agents[1].name(): 0}

start_time = timer()
for n in range(num_games):
    play_a_game(game, agents, output)
    play_a_game(game, agents[::-1], output)
    print(ScoreAgent)
print("Time:", timer() - start_time)