def args_gui(args): if path_exists(args.model): # assert os.path.exists(args.model), print("The path {} doesn't exists".format(args.model)) if args.type == 'nn': net = TDGammon(hidden_units=args.hidden_units, lr=0.1, lamda=None, init_weights=False) env = gym.make('gym_backgammon:backgammon-v0') else: net = TDGammonCNN(lr=0.0001) env = gym.make('gym_backgammon:backgammon-pixel-v0') net.load(checkpoint_path=args.model, optimizer=None, eligibility_traces=False) agents = {BLACK: TDAgent(BLACK, net=net), WHITE: HumanAgent(WHITE)} gui = GUI(env=env, host=args.host, port=args.port, agents=agents) gui.run()
def play(): rounds = 30 white_wins = 0 agent1 = HumanAgent() nnet = MiniShogiNNetWrapper() # nnet.nnet.model.summary() plot_model(nnet.nnet.model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) agent2 = NNetMCTSAgent(nnet, comp=False) print('Preparing neural net') # agent2.train_neural_net() agent2.comp = True agent2.nnet.load_checkpoint(filename='best.h5') print('Preparation complete') for i in range(1, rounds + 1): begin = time.time() print('Game {0}/{1}'.format(i, rounds)) g = MiniShogiGame() while True: current_agent = agent1 if g.game_state.colour == 'W' else agent2 current_agent.act(g) # print(g.game_state.print()) logging.info( g.game_state.print_state(flip=g.game_state.colour == 'B')) if g.game_state.game_ended(): if g.game_state.colour == 'B': white_wins += 1 print( 'Stats: {0} win {1} ({2}%), {3} win {4} ({5}%)| time: {6}'. format(agent1.__class__.__name__, white_wins, white_wins / i * 100, agent2.__class__.__name__, i - white_wins, (i - white_wins) / i * 100, time.time() - begin)) logging.info( 'Stats: {0} win {1} ({2}%), {3} win {4} ({5}%)| time: {6}'. format(agent1.__class__.__name__, white_wins, white_wins / i * 100, agent2.__class__.__name__, i - white_wins, (i - white_wins) / i * 100, time.time() - begin)) break if g.game_state.move_count > 300: # stop very long games print('Game too long, terminating') break
def play(): first_move = random.randint(1, 100) env = TicTacToeEnv(False) human = HumanAgent("X") machine = BaseAgent("O") agents = [human, machine] start_mark = "O" if first_move % 2 == 0 else "X" while True: env.set_start_mark(start_mark) state = env.reset() board, mark = state done = False env.render() while not done: agent = agent_by_mark(agents, mark) human = isinstance(agent, HumanAgent) env.show_turn(True, mark) available_actions = env.available_actions() if human: action = agent.act(available_actions) if action is None: sys.exit() else: action = agent.act(board, state, available_actions) state, reward, done, info = env.step(action) env.render(mode="human") if done: env.show_result(True, mark, reward) break else: board, mark = state start_mark = next_mark(start_mark)
parser = argparse.ArgumentParser() parser.add_argument('p1', choices=['r', 'h', 'c']) parser.add_argument('p2', choices=['r', 'h', 'c']) parser.add_argument('nrows', type=int) parser.add_argument('ncols', type=int) parser.add_argument('--prune', action='store_true') parser.add_argument('--depth', type=int) args = parser.parse_args() # print("args:", args) players = [] for p in [args.p1, args.p2]: if p == 'r': player = RandomAgent() elif p == 'h': player = HumanAgent() elif p == 'c': if not args.depth: player = MinimaxAgent() else: if not args.prune: player = MinimaxHeuristicAgent(args.depth) else: player = MinimaxHeuristicPruneAgent(args.depth) players.append(player) start_state = GameState(args.nrows, args.ncols) results = [] w1 = 0 w2 = 0
paddleB = Paddle(WHITE, 10, 100) paddleB.rect.x = 670 paddleB.rect.y = 200 ball = Ball(WHITE, 10, 10) ball.rect.x = 345 ball.rect.y = 195 all_sprite_list = pygame.sprite.Group() all_sprite_list.add(paddleA) all_sprite_list.add(paddleB) all_sprite_list.add(ball) #player agents player_random = RandomAgent(3) player_human = HumanAgent() #Loop while true gameOn = True #clock controls how fast screen updates clock = pygame.time.Clock() #player score scoreA = 0 scoreB = 0 #Main while gameOn: #terminate if user quits
import gym from agents import A2CAgent, RandomAgent, HumanAgent, MCTSAgent N_PLAYERS = 2 env = gym.make("gym_azul:azul-v0", n_players=N_PLAYERS) # define some agents human = HumanAgent() mcts = MCTSAgent() # beware, only supports 2 players random = RandomAgent() a2c = A2CAgent(env, hidden_dim=256) a2c_path = 'checkpoints/12999.pt' a2c.learning = False if a2c_path: a2c.load(a2c_path) # which agents do you want to see playing agents = [mcts, random] # playing loop state = env.reset() done = False while not done: for id, agent in enumerate(agents): if done: break state, done = agent.play(state, env, id) winner, score = env.get_winner() print('Agent {} won with score {}!'.format(winner, score))
medium_inputs = {"m", "med", "medium"} hard_inputs = {"h", "hard"} while difficulty_input not in easy_inputs.union(medium_inputs).union( hard_inputs): difficulty_input = input( "Select difficulty level: EASY (E), MEDIUM (M), HARD (H)").lower() if difficulty_input in easy_inputs: difficulty = Difficulty.EASY elif difficulty_input in medium_inputs: difficulty = Difficulty.MEDIUM elif difficulty_input in hard_inputs: difficulty = Difficulty.HARD v = vision.Vision() td_agent = TDAgent(WHITE, model, v, difficulty) human_agent = HumanAgent(BLACK, v) agents_list = [td_agent, human_agent] game = Game(agents_list) set_start_state = False if set_start_state: start_points = [[ 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], [ 0, 0, 3, 3, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]] start_bar = [0, 2] start_roll = [3, 1]
if load_from: agent.load(load_from) agents.append(agent) agents.append(MCTSAgent()) if PLAYING_MODE == 'manual': # 1 agent and 1 human assert N_PLAYERS == 2 actor_optim = optim.Adam critic_optim = optim.Adam agent = A2CAgent(env, HIDDEN_DIM, actor_optim, critic_optim, ACTOR_LR, CRITIC_LR, GAMMA) if load_from: agent.load(load_from) agents.append(agent) agents.append(HumanAgent()) # ==================== ACTUAL TRAINING ========================================== for ep in range(N_EPISODES): state = env.reset() done = False counter = 0 print('Game {}/{}'.format(ep + 1, N_EPISODES)) while not done: update = not ((counter + 1) % UPDATE_EVERY) counter += 1 for id, agent in enumerate(agents): if done: break state, done = agent.play(state, env, id) if update and TRAINING: