def _move_selected(state, button, move): """Interface callback for proceeding through game play with user""" # Guard clause for when already-filled place is attempted if button.get_label() in [X, O]: return interface.set_invalid_move_state(state) # Update board state based on move human_id = board.not_id(state.agent_state.identifier) state.board_state = board.place(state.board_state, human_id, move) # Update UI to reflect model interface.update_board(state) # Check for game finished if board.is_finished(state.board_state): # Display game results interface.game_finished(state) else: # Process agent's decision (move, agent_state) = agent.move(state.agent_state, state.board_state) # Update board and agent state state.board_state = board.place(state.board_state, state.agent_state.identifier, move) state.agent_state = agent_state # Update UI to reflect model interface.update_board(state) # Another check for game finish to prevent inadvertent user events if board.is_finished(state.board_state): # Display game results interface.game_finished(state)
def _playout(self, n, n_moves, queue, id, info): data = {} data['max'] = -1 data['min'] = 9999999 data['media'] = 0 data['wins'] = 0 data['plays'] = 0 data['rewards'] = 0 i = 1 breaked = 0 while i <= n: if breaked > 5: break match = time.time() try: board = self.get_board() while True: if board.moves == 0 or board.is_finished(): break move = None if n_moves > 1: move = self.best_move(board, n_moves) else: moves = board.possible_moves() move = random.choice(moves) board.test_move(move[0], move[1]) if board.points > data['max']: data['max'] = board.points if board.points < data['min']: data['min'] = board.points data['media'] += board.points if board.is_finished(): data['wins'] += 1 data['rewards'] += self.get_reward(board) data['plays'] += 1 i += 1 if info: if i % 10 == 0: print("Process %d played %d times" % (id, i)) breaked = 0 except KeyboardInterrupt: exit(1) except: breaked += 1 if info: print("Bot_play #%d in Process %d failed" % (id, i)) if info: print("Process %d FINISHED" % id) queue.put(data)
def _play_training_game(player1, player2): """Simulated game between two agents""" # Initialize empty board board_state = board.empty() while board.is_finished(board_state) == False: # Player 1 turn (board_state, player1) = _play_train_move(player1, board_state) # Break if game already finished if board.is_finished(board_state): break # Player 2 turn (board_state, player2) = _play_train_move(player2, board_state) # Update rewards for final step agent.epoch_finished(player1, board_state) agent.epoch_finished(player2, board_state)
def _value(board_state, identifier): """Defines default rewards""" finished = board.is_finished(board_state) if finished == False: # Basic actions start with negative reward return -0.5 elif board.is_win(board_state, identifier): # Winning has a high reward return 1.0 elif board.is_win(board_state, board.not_id(identifier)): # Losing is a negative reward return -1.0 else: # Draw is worth more than losing return 0.5
import board as board import player as player import cpu as cpu board = board.board() player = player.player() cpu = cpu.cpu() gameRunning = 0 currentPlayer = 0 while (gameRunning == 0): board.print_it() if (currentPlayer == 0): player.make_turn(board) currentPlayer = 1 else: cpu.make_turn(board) currentPlayer = 0 gameRunning = board.is_finished() board.print_it()