def pazaak(): b = PazaakBoard() player_side_deck = [ Card(x) for x in [choice(range(1, 7)) for _ in range(4)] ] opponent_side_deck = [ Card(x) for x in [choice(range(1, 7)) for _ in range(4)] ] players = [ PazaakPlayer(player=1, side_deck=player_side_deck), PazaakPlayer(player=2, side_deck=opponent_side_deck) ] state = PazaakState(board=b, players=players, player=players[0]) while b.status(players=state.players) == -1: if not state.player.stand: state = state.random_card() node = Node(state=state) tree = Mcts(root=node) print(">>>>> CURR PLAYER: <<<<<<<", state.player.player) b = tree.find_next_move(100) state = PazaakState(board=b, player=state.player, players=state.players, player_index=state.player_index) print("TURN\n") b.print()
def get_next_board(): global app_state if app_state.board.status(players=app_state.players) == -1: if not app_state.player.stand: app_state = app_state.random_card() node = Node(state=app_state) tree = Mcts(root=node) app_state.board = tree.find_next_move(100) app_state = PazaakState(board=app_state.board, player=app_state.player, players=app_state.players, player_index=app_state.player_index) return app_state.board return app_state.board
def tic_tac_toe(): b = TicTacBoard() players = [TicTacPlayer(1), TicTacPlayer(2)] state = TicTacState(board=b, player=players[0], players=players) while b.status() == -1: node = Node(state=state) tree = Mcts(root=node) print(">>>>> CURR PLAYER: <<<<<<<", state.player.player) b = tree.find_next_move(100) state = TicTacState(board=b, player=state.player, players=state.players, player_index=state.player_index) print("TURN\n") b.print()
4: LazyAgent } print("Welcome in Pong") selected_opponent, = input( "Select opponent for MCTS (1 - Random, 2 - Safe, 3 - Aggressive, 4 - Lazy): " ).split() game = PongGame() game = PongMonitor(game, ".", force=True) game.reset() opponent = possible_opponents[int(selected_opponent)]() mcts_agent = GreedyAgent() tree = Mcts(game, simulation_agent=mcts_agent) # tree = Mcts(game) count = 0 while not game.done: count = count + 1 start = time() tree.run(30, verbose=True) stop = time() ob = game._get_obs() # if ob is not None: # game.ale.saveScreenPNG('images/' + str(count) + '-state.png') # print(count, end=" ") # for i, val in enumerate(ob): # print(val, end=" ")
filename = './logs-defence/' + playout['method'] + '-' + \ ('no-skip' if playout['exploration_parameter'] is False else 'with-skip') + \ '-' + str(playout['exploration_parameter']) + \ '/pong-' + playout['method'] + '-' + str(playout['runs']) + '-against-' + opponent_names[ playout['agent']] + '_' + datetime.now().strftime("%Y%m%d-%H%M%S") print(filename) game = PongMonitor(game, filename, force=False) game.reset() pong_logger = PDLogger(filename) opponent = possible_opponents[agent]() mcts_agent = GreedyAgent() tree = None if playout['method'] == 'greedy': tree = Mcts(game, simulation_agent=mcts_agent, logger=pong_logger, skip_actions=playout['skip_actions'], exploration_parameter=playout['exploration_parameter']) if playout['method'] == 'random': tree = Mcts(game, logger=pong_logger, skip_actions=playout['skip_actions'], exploration_parameter=playout['exploration_parameter']) count = 0 while not game.done: count = count + 1 start = time() tree.run(playout['runs'], verbose=True) stop = time() ob = game._get_obs() # if ob is not None: # game.ale.saveScreenPNG('images/' + str(count) + '-state.png')
file_path = './logs-nim/' + str(playout['runs']) + \ '-' + str(playout['opponent']) + \ '-' + str(playout['exploration_parameter']) filename = file_path + '/nim-' + str(playout['piles']) + '-' + str( playout['objects']) + '-' + str(playout['runs']) + '_vs_' + str( playout['opponent']) + '_' + datetime.now().strftime( "%Y%m%d-%H%M%S") print(filename) output_dir = Path(file_path) output_dir.mkdir(parents=True, exist_ok=True) game = Nim(int(piles), int(objects)) nim_logger = PDLogger(filename) tree = Mcts(game, logger=nim_logger, exploration_parameter=playout['exploration_parameter']) tree.run(1) count = 0 winner = 0 while not game.done: count = count + 1 tree.run(playout['runs']) action = tree.predict() game.act(action) tree.move_root(action) if game.done: print("You won!") winner = 1
"Set game settings (`number of piles` `number of objects`): ").split() game = Nim(int(piles), int(objects)) QL = QLearn(game) state_copy = game.get_state() QL.train(ExpertAgent()) print_rewards(QL.reward_all_ep, 100000) print() count = 0 for i in range(10): print('Try yourself against QL :)') game.set_state(state_copy, False, 0) tree = Mcts(game, exploration_parameter=1.41) tree.run(1) winner = 0 while not game.done: print(game.piles) action = QL.select_move(game) print('CPU 0 move: %s' % str(action)) game.act(action) tree.move_root(action) if game.done: winner = 1 break
from mcts.mcts import Mcts from pong.pong_game import PongGame from time import time from pong.gym_agents import * from pong.monitor import PongMonitor from ddqn.ddqn_agent import DdqnAgent, dqn_heuristic game = PongGame() game = PongMonitor(game, ".", force=True) game.reset() mcts_agent = GreedyAgent() tree = Mcts(game, simulation_agent=mcts_agent, heuristic=dqn_heuristic) ddqn_agent = DdqnAgent() while not game.done: ob = game._get_obs() action1 = ddqn_agent.act(ob) game.act(action1) tree.move_root(action1) tree.run(5, verbose=True) action2 = tree.predict() game.act(action2) tree.move_root(action2) game.render()
from mcts.mcts import Mcts from nim.nim import Nim print("Hello in Nim") piles, objects = input( "Set game settings (`number of piles` `number of objects`): ").split() game = Nim(int(piles), int(objects)) tree = Mcts(game) tree.run(1) while not game.done: print(game.piles) # move = input("Your move (`pile` `objects`): ").split() # action = tuple(int(x) for x in move) tree.run(1200) action = tree.predict() print('CPU 0 move: %s' % str(action)) game.act(action) tree.move_root(action) if game.done: print("You won!") exit() print(game.piles) tree.run(1200) action = tree.predict()