def run_one_episode(arguments): neural_network, environment, num_episodes, instance_id \ = arguments # Create a Agent list using the neural network agent_list = [ sa.RandomAgent(), sa.RandomAgent(), sa.RandomAgent(), sa.RandomAgent() ] env = pommerman.make(environment, agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(int(num_episodes)): state = env.reset() done = False while not done: # env.render() actions = env.act(state) state, reward, done, info = env.step(actions) print('Episode {} finished'.format(i_episode)) env.close() return
def agent_vs_random(eval_agent, player, variant="TicTacToe"): """ Executes one game between eval_agent and a random player. Args: eval_agent: The AlphaZeroAgent instance. player: If player=-1, the agent plays as player two. Otherwise, the agent begins the game as player one. variant: The game variant. Either "TicTacToe" or "Connect4". """ if variant == "TicTacToe": game_environment = game.TicTacToeOptimized() max_length = 9 if variant == "Connect4": game_environment = game.Connect4Optimized() max_length = 42 player_one = eval_agent player_two = agent.RandomAgent(eval_agent) if player == -1: player_two = eval_agent player_one = agent.RandomAgent(eval_agent) # reset game game_environment.reset_game() player_one.join_game(game_environment) player_two.join_game(game_environment) current_player = game_environment.current_player winning = 0 turn = 0 num_simulations = config.EVALUATION['num_simulations'] while winning is 0 and turn < max_length: if current_player == 0: winning, _, _ = player_one.play_move(num_simulations, temperature=0) if current_player == 1: winning, _, _ = player_two.play_move(num_simulations, temperature=0) current_player = game_environment.current_player turn += 1 if current_player == 0: winner = -1*winning else: winner = winning return winner
def create_agent(agent_type, *args, **kwargs): if agent_type.startswith('stationary'): pi = None if agent_type.endswith(']'): pi = agent_type.lstrip('stationary[').rstrip(']').split(',') pi = [float(p) for p in pi] return agent.StationaryAgent(*args, **kwargs, pi=pi) elif agent_type == 'random': return agent.RandomAgent(*args, **kwargs) elif agent_type == 'q': return agent.QAgent(*args, **kwargs) elif agent_type == 'phc': return agent.PHCAgent(*args, **kwargs) elif agent_type == 'wolf': return agent.WoLFAgent(*args, **kwargs) elif agent_type == 'minimaxq': return agent.MinimaxQAgent(*args, **kwargs) elif agent_type == 'metacontrol': return agent.MetaControlAgent(*args, **kwargs) elif agent_type == 'littmansoccerhandcoded': return littmansoccer.HandCodedAgent(*args, **kwargs) elif agent_type.endswith('pickle'): return load_agent(agent_type) else: print('no such agent: {}'.format(agent_type)) return None
def getAgent(agentType, playerNum): if agentType == 'human': return agt.HumanAgent(playerNum) elif agentType == 'random': return agt.RandomAgent() elif agentType == 'simple': return agt.SimpleAgent() elif agentType == 'reflex': return agt.ReflexAgent(playerNum) elif agentType == 'simple++': return agt.SimpleEnhancedAgent(playerNum)
def main(): # Print all possible environments in the Pommerman registry print(pommerman.registry) # Create a set of agents (exactly four) agent_list = [ # agents.SimpleAgent(), # agents.SimpleAgent(), # agents.SimpleAgent(), # agents.SimpleAgent(), sa.RandomAgent(), sa.RandomAgent(), sa.RandomAgent(), sa.RandomAgent() # agents.DockerAgent("pommerman/simple-agent", port=12345), ] # Make the "Free-For-All" environment using the agent list # ['PommeFFACompetition-v0', 'PommeFFACompetitionFast-v0', 'PommeFFAFast-v0', 'PommeFFA-v1', 'PommeRadio-v2', 'PommeTeamCompetition-v0', 'PommeTeam-v0', 'PommeTeamFast-v0'] # env = pommerman.make('PommeFFACompetitionFast-v0', agent_list) env = pommerman.make('PommeFFACompetition-v0', agent_list) # env = pommerman.make('PommeTeamCompetition-v0', agent_list) # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False while not done: # env.render() actions = env.act(state) print("Actions every step ", actions) state, reward, done, info = env.step(actions) print("One step ") # print("State ", state) # Array of 4 states print("Reward ", reward) # -1, 0 ,1 # print("Done ", done) # False/True # print("Info ", info) # {'result': <Result.Win: 0>, 'winners': [3]} print('Episode {} finished'.format(i_episode)) # for i, al in enumerate(agent_list): # print("Agent Idx ", i, al.get_state_timline()) env.close()
def main(): #read configure file or parse arguments args = parser.ParseArgs() cfg = { "learning_rate": 0.1, "activiation": None, } #save configure parameter #args:env,multiprocess,hyperparameter cpu = args.cpu # Bool type env_id = args.env_id # name-version #create envirnonment and recorder env = vector.make(env_id, 1, asynchronous=False, wrappers=None) env = wrappers.Monitor(env, directory='/tmp/results', video_callble=None, force=False, resume=False, write_upon_reset=False, uid=None, mode=None) env.seed(0) #create policy/agent agent = ag.RandomAgent(env.action_space) #create optimizer and set hyperparameters model = model.MultiLayerPerc(cfg, env.ob_space, env.action_space) optimizer = op.ces(cfg) #set level of log information logger.set_level(logger.INFO) #initialize episode_count = 100 reward = 0 render = False done = False # rollout for i in range(episode_count): ob = env.reset() while True: action = agent.act(ob, reward, done) ob, reward, done = env.step(action) cum_reward += reward if render and i % 10 == 0: env.render() if done: break env.close()
def run_test_suite(ABPlayer, depth, time, x, y, n): seed = 1234 random.seed(seed) max = 25 AlphaBetaVictories = 0 for i in range(0, 25): # # Random vs. AlphaBeta # goSecond = game.Game( x, # width y, # height n, # tokens in a row to win agent.RandomAgent("random"), # player 1 aba.AlphaBetaAgent("alphabeta", depth)) # player 2 # AlphaBeta vs Random # goFirst = game.Game( x, # width y, # height n, # tokens in a row to win aba.AlphaBetaAgent("alphabeta", depth), # player 1 agent.RandomAgent("random")) # player 2 if ABPlayer == 1: outcome = goFirst.timed_go(time) if outcome == 1: AlphaBetaVictories += 1 else: outcome = goSecond.timed_go(time) if outcome == 2: AlphaBetaVictories += 1 seed += random.randint(1, 100) #print("RANDOM SEED: " + str(seed)) random.seed(seed) print("Game " + str(i) + " complete") print("AlphaBeta won " + str(AlphaBetaVictories) + " out of " + str(max))
def create_player_list(args): # Only need board_params and players in args board_params = args["board_params"] list_players = [] for i, player_args in enumerate(args["players"]): kwargs = removekey(player_args, "agent") if player_args["agent"] == "RandomAgent": list_players.append(agent.RandomAgent(f"Random_{i}")) elif player_args["agent"] == "PeacefulAgent": list_players.append(agent.PeacefulAgent(f"Peaceful_{i}")) elif player_args["agent"] == "FlatMCPlayer": list_players.append( agent.FlatMCPlayer(name=f'flatMC_{i}', **kwargs)) elif player_args["agent"] == "UCTPlayer": list_players.append(agent.UCTPlayer(name=f'UCT_{i}', **kwargs)) elif player_args["agent"] == "PUCTPlayer": world = World(board_params["path_board"]) board = Board( world, [agent.RandomAgent('Random1'), agent.RandomAgent('Random2')]) board.setPreferences(board_params) puct = load_puct(board, player_args) list_players.append(puct) elif player_args["agent"] == "NetPlayer": world = World(board_params["path_board"]) board = Board( world, [agent.RandomAgent('Random1'), agent.RandomAgent('Random2')]) board.setPreferences(board_params) netPlayer = load_NetPlayer(board, player_args) list_players.append(netPlayer) elif player_args["agent"] == "Human": hp_name = player_args["name"] if "name" in player_args else "human" hp = agent.HumanAgent(name=hp_name) list_players.append(hp) return list_players
def __battle_random_agent(self): executor = concurrent.futures.ProcessPoolExecutor() for index in range(0, self.__NUMBER_INDIVIDUALS): waiting_queue = [] for times in range(0, self.__NUMBER_BATTLES): first = self.__now_generation[index][0].copy() second = agent.RandomAgent() game = game_board.GameBoard(first, second) waiting_queue.append(executor.submit(game.game_start)) for end_task in concurrent.futures.as_completed(waiting_queue): self.__progress_bar.update(1) if end_task.result() == -1: self.__now_generation[index][1] += 2 elif end_task.result() == 2: self.__now_generation[index][1] += 1 executor.shutdown()
def test_cost(layout, circle, dices=None): C_th, _ = value_iteration.markovDecision(layout, circle, actions=dices) if dices == None: pi = agent.OptimalAgent(layout, circle) else: pi = agent.RandomAgent(dices) C_sim = simulation.estimate_cost(layout, circle, pi, n_episodes=int(1e3)) passed = np.allclose(C_th, C_sim, atol=0.1) if not passed: print("Not the same expected cost:") print("Th: ", *["{:.2f}".format(c) for c in C_th]) print("Sim:", *["{:.2f}".format(c) for c in C_sim]) else: print("OK") return passed
def playGames(agent_b, agnet_w=agent.RandomAgent(), max_epochs): w_win =0 b_win =0 scores = [] for i_episode in range(args.max_epochs): observation = env.reset() While True: ################### 黑棋 B ############################### 0表示黑棋 # 这部分 黑棋 action = [65,0] enables = env.possible_actions if len(enables) == 0: action[0] = env.board_size**2 + 1 else: action[0] = agent_b.place(observation, enables, 0)# 0 表示黑棋 observation, reward, done, info = env.step(action) ################### 白棋 W ############################### 1表示白棋 # 这部分 白棋 action = [65,1] enables = env.possible_actions # if nothing to do ,select pass if len(enables) == 0: action[0] = env.board_size ** 2 + 1 # pass else: action[0] = agent_w.place(observation, enables, 1) observation, reward, done, info = env.step(action) ################## GAME OVER ########################### if done: # 游戏 结束 # env.render() black_score = len(np.where(env.state[0,:,:]==1)[0]) ############## 这里猪脚的程序有问题,因为可以棋盘下不满 white_score = len(np.where(env.state[1,:,:]==1)[0]) if black_score > white_score: b_win += 1 else: w_win += 1 scores.append((black_score,white_score)) break
def test_experiment_shoebox(): """ Testing a run with ShoeBox room TODO """ # Shoebox Room room = room_types.ShoeBox(x_length=10, y_length=10) agent_loc = np.array([3, 8]) # Set up the gym environment env = gym.make( "audio-room-v0", room_config=room.generate(), agent_loc=agent_loc, corners=room.corners, max_order=10, step_size=1.0, acceptable_radius=0.8, ) # create buffer data folders utils.create_buffer_data_folders() tfm = nussl.datasets.transforms.Compose([ nussl.datasets.transforms.GetAudio(mix_key='new_state'), nussl.datasets.transforms.ToSeparationModel(), nussl.datasets.transforms.GetExcerpt(excerpt_length=32000, tf_keys=['mix_audio'], time_dim=1), ]) # create dataset object (subclass of nussl.datasets.BaseDataset) dataset = BufferData(folder=constants.DIR_DATASET_ITEMS, to_disk=True, transform=tfm) # Load the agent class a = agent.RandomAgent(env=env, dataset=dataset, episodes=2, max_steps=10, plot_reward_vs_steps=False) a.fit()
def main(args=None): from optparse import OptionParser usage = "usage: %prog [options]" parser = OptionParser(usage=usage) parser.add_option("-p", "--player1", dest="player1", default="random", help="Choose type of first player") (opts, args) = parser.parse_args(args) evalArgs = load_weights() evalFn = aiAgents.nnetEval p1 = None if opts.player1 == 'random': p1 = agent.RandomAgent(game.Game.TOKENS[0]) elif opts.player1 == 'reflex': p1 = aiAgents.TDAgent(game.Game.TOKENS[0], evalArgs) elif opts.player1 == 'expectiminimax': p1 = aiAgents.ExpectiMiniMaxAgent(game.Game.TOKENS[0], evalFn, evalArgs) elif opts.player1 == 'human': p1 = agent.HumanAgent(game.Game.TOKENS[0]) # p2 = agent.RandomAgent(game.Game.TOKENS[1]) p2 = aiAgents.ExpectiMiniMaxAgent(game.Game.TOKENS[1], evalFn, evalArgs) if p1 is None: print "Please specify legitimate player" import sys sys.exit(1) play([p1, p2])
def main(args): if not os.path.isdir(args.log_dir): os.makedirs(args.log_dir) if args.save_dir and not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) if args.seed: random.seed(args.seed) np.random.seed(args.seed) basename = envs.BASENAMES[args.environment] env_name = '{basename}-{scenario}-{dataset}-v0'.format( basename=basename, scenario=args.scenario, dataset=args.dataset) env = gym.make(env_name) print("Environment {}".format(args.environment)) print(env.observation_space) print(env.action_space) if args.features == 'net': extractor = feature_extractor.NetFeatureExtractor() elif args.features == 'hash': extractor = feature_extractor.ImageHashExtractor() else: raise NotImplementedError( "Unknown feature extraction method '{}'".format(args.features)) if args.agent == "baseline": # Special case -> handled somewhere else baseline(env, args) return elif args.agent == "bandit": actor = agent.BanditAgent(env, extractor) # VW elif args.agent == "random": actor = agent.RandomAgent(env) obs = env.reset() print(obs) original_score = 0.0 modified_score = 0.0 totalreward = 0.0 totalsuccess = 0.0 iter_duration = 0.0 statistics = {} for idx in range(env.action_space.n): statistics[env.actions[idx][0]] = { 'action': env.actions[idx][0], 'count': 0, 'reward': 0.0, 'success': 0 } if env.is_hierarchical_action(idx): params = env.actions[idx][1] for param_idx in range(env.hierarchical_actions[idx]['space'].n): statistics[params[param_idx][0]] = { 'action': params[param_idx][0], 'count': 0, 'reward': 0.0, 'success': 0 } log_file = '{timestamp}-{env}-{sc}-{agent}.json'.format( timestamp=datetime.now().strftime("%Y%m%d%H%M%S"), env=args.environment, sc=args.scenario, agent=args.agent) log_file = os.path.join(args.log_dir, log_file) for iteration in range(1, args.iterations + 1): start = time.time() act = actor.act(obs) obs, reward, done, info = env.step(act) actor.update(reward, done=done) iter_duration += time.time() - start action_name, param_name = env.get_action_name(act[0], act[1]) statistics[action_name]['count'] += 1 statistics[action_name]['reward'] += reward[0] statistics[action_name]['success'] += reward[0] > 0 if param_name: statistics[param_name]['count'] += 1 statistics[param_name]['reward'] += reward[1] statistics[param_name]['success'] += reward[1] > 0 original_score += info['original_score'] modified_score += info['modified_score'] totalreward += reward[0] totalsuccess += reward[0] > 0 if done: obs = env.reset() if (iteration % args.log_interval == 0) or iteration == args.iterations: stat_string = ' | '.join([ "{:.2f} ({:.2f}/{:d})".format( v['success'] / (v['count'] + 1e-10), v['success'], v['count']) for v in statistics.values() ]) print("i = {}".format(iteration), round(totalsuccess / iteration, 2), round(original_score / iteration, 2), round(modified_score / iteration, 2), '\t', stat_string) log_dict = { 'env': args.environment, 'scenario': args.scenario, 'agent': args.agent, 'iteration': iteration, 'totalreward': totalreward, 'success': totalsuccess, 'statistics': statistics, 'original_accuracy': float(original_score) / iteration, 'modified_accuracy': float(modified_score) / iteration, 'duration': iter_duration / iteration } open(log_file, 'a').write(json.dumps(log_dict) + os.linesep)
def search(self, state, depth, use_val=False): # print("\n\n-------- SEARCH --------") # print(f"depth: {depth}") # state.report() # Is terminal? return vector of score per player if isTerminal(state) or depth > self.max_depth: # print("\n\n-------- TERMINAL --------") return score_players(state), score_players(state) # Active player is dead, then end turn while not state.activePlayer.is_alive: state.endTurn() if state.gameOver: return score_players(state), score_players(state) s = hash(state) # Is leaf? if not s in self.Ps: canon, map_to_orig = state.toCanonical(state.activePlayer.code) batch = torch_geometric.data.Batch.from_data_list( [boardToData(canon)]) mask, moves = maskAndMoves(canon, canon.gamePhase, batch.edge_index) if not self.apprentice is None: policy, value = self.apprentice.play(canon) else: # No bias, just uniform sampling for the moment policy, value = torch.ones_like(mask) / max( mask.shape), torch.zeros((1, 6)) policy = policy * mask self.Vs[s], self.As[s] = mask.squeeze(), moves self.Ps[s] = policy.squeeze() self.Ns[s] = 1 # Return an evaluation v = np.zeros(6) for _ in range(self.sims_per_eval): sim = copy.deepcopy(state) sim.simulate(agent.RandomAgent()) v += score_players(sim) v /= self.sims_per_eval # Fix order of value returned by net value = value.squeeze() # Apprentice already does this # cor_value = torch.FloatTensor([value[map_to_orig.get(i)] if not map_to_orig.get(i) is None else 0.0 for i in range(6)]) cor_value = value return v, cor_value # Not a leaf, keep going down. Use values for the current player p = state.activePlayer.code action = -1 bestScore = -float('inf') # print("Valid:") # print(self.Vs[s]) for i, act in enumerate(self.As[s]): a = hash(act) # print(i, act) if self.Vs[s][i] > 0.0: if (s, a) in self.Rsa: # PUCT formula uct = self.Rsa[(s, a)][p] + self.cb * np.sqrt( np.log(self.Ns[s]) / max(self.Nsa[(s, a)], self.eps)) val = self.wb * self.Qsa[(s, a)] * (use_val) pol = self.wa * self.Ps[s][i] / (self.Nsa[(s, a)] + 1) sc = uct + pol + val[p] else: # Unseen action, take it action = act break if sc > bestScore: bestScore = sc action = act if isinstance(action, int) and action == -1: print("**** No move?? *****") state.report() print(self.As[s]) print(self.Vs[s]) # print('best: ', action) a = hash(action) # Best action in simplified way move = buildMove(state, action) # Play action, continue search # TODO: For now, armies are placed on one country only to simplify the game # print(move) state.playMove(move) v, net_v = self.search(state, depth + 1, use_val) if isinstance(net_v, torch.Tensor): net_v = net_v.detach().numpy() if isinstance(v, torch.Tensor): v = v.detach().numpy() if (s, a) in self.Rsa: rsa, qsa, nsa = self.Rsa[(s, a)], self.Qsa[(s, a)], self.Nsa[(s, a)] self.Rsa[(s, a)] = (nsa * rsa + v) / (nsa + 1) self.Qsa[(s, a)] = (nsa * qsa + net_v) / (nsa + 1) self.Nsa[(s, a)] += 1 else: self.Rsa[(s, a)] = v self.Qsa[(s, a)] = net_v self.Nsa[(s, a)] = 1 self.Ns[s] += 1 return v, net_v
# Calculate and print scores sscores = sorted(((v, k.name) for k, v in scores.items()), reverse=True) print("\nSCORES:") for v, k in sscores: print(v, k) ####################### # Run the tournament! # ####################### # Set random seed for reproducibility random.seed(1) # Construct list of agents in the tournament agents = [ # aba.AlphaBetaAgent("aba", 4), agent.RandomAgent("random1"), agent.RandomAgent("random2"), agent.RandomAgent("random3"), agent.RandomAgent("random4") ] # Run! play_tournament( 7, # board width 6, # board height 4, # tokens in a row to win 15, # time limit in seconds agents) # player list
# GET THE AGENT ########################### import agent a = None if opts.agent == 'value': a = agent.ValueIterationAgent(mdp, opts.discount, opts.iters) elif opts.agent == 'q': a = agent.QLearningAgent(env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon) elif opts.agent == 'random': # No reason to use the random agent without episodes if opts.episodes == 0: opts.episodes = 1 a = agent.RandomAgent(mdp.getPossibleActions) else: raise 'Unknown agent type: ' + opts.agent ########################### # RUN EPISODES ########################### print(opts.agent) # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES if opts.agent == 'value': display.displayValues(a, message="VALUES AFTER " + str(opts.iters) + " ITERATIONS") display.pause() display.displayQValues(a, message="Q-VALUES AFTER " + str(opts.iters) +
def main(args=None): import sys print("Please choose the type of agent human or TDagent or random") line = sys.stdin.readline() from optparse import OptionParser usage = "usage: %prog [options]" parser = OptionParser(usage=usage) parser.add_option("-d", "--draw", dest="draw", action="store_true", default=False, help="Draw game") parser.add_option("-n", "--num", dest="numgames", default=1, help="Num games to play") parser.add_option("-p", "--player1", dest="player1", default=str(line.strip()), help="Choose type of first player") parser.add_option("-e", "--eval", dest="eval", action="store_true", default=True, help="Play with the better eval function for player") (opts, args) = parser.parse_args(args) weights = None weights1 = None if opts.eval: weights, weights1 = load_weights(weights, weights1) evalArgs = weights evalArgs1 = weights1 evalFn = aiAgents.nnetEval print("The choosen agent is: " + str(opts.player1)) p1 = None if str(opts.player1) == 'random': p1 = agent.RandomAgent(game.Game.TOKENS[0]) #print p1 elif opts.player1 == 'TDagent': p1 = aiAgents.TDAgent(game.Game.TOKENS[0], evalArgs1) elif opts.player1 == 'expectimax': p1 = aiAgents.ExpectimaxAgent(game.Game.TOKENS[0], evalFn, evalArgs) elif opts.player1 == 'expectiminimax': p1 = aiAgents.ExpectiMiniMaxAsgent(game.Game.TOKENS[0], evalFn, evalArgs) elif opts.player1 == 'human': p1 = agent.HumanAgent(game.Game.TOKENS[0]) p2 = aiAgents.TDAgent(game.Game.TOKENS[1], evalArgs) # p2 = aiAgents.ExpectiMiniMaxAgent(game.Game.TOKENS[1],evalFn,evalArgs) if opts.player1 == 'random': test([p1, p2], numGames=int(opts.numgames), draw=opts.draw) print("o is random") print("x is the agent") if opts.player1 == 'TDagent': #test([p1,p2],numGames=int(opts.numgames),draw=opts.draw) play([p1, p2]) if opts.player1 == 'human': play([p1, p2]) print("o is td(0)") print("x is the agent td(0.5)") if p1 is None: print "Please specify legitimate player" import sys sys.exit(1)
expert_mcts_sims = inputs["expert_mcts_sims"] path_data = inputs["path_data"] path_model = inputs["path_model"] batch_size = inputs["batch_size"] model_args = read_json(inputs["model_parameters"]) path_board = inputs["path_board"] # ---------------- Model ------------------------- #%%% Create Board world = World(path_board) # Set players pR1, pR2, pR3 = agent.RandomAgent('Red'), agent.RandomAgent( 'Blue'), agent.RandomAgent('Green') players = [pR1, pR2, pR3] # Set board # TODO: Send to inputs prefs = { 'initialPhase': True, 'useCards': True, 'transferCards': True, 'immediateCash': True, 'continentIncrease': 0.05, 'pickInitialCountries': True, 'armiesPerTurnInitial': 4, 'console_debug': False }
def testRandomAgent(self): rs = np.random.RandomState(seed=1) env = gw.Grid2D(np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]), r=rs) ra = a.RandomAgent(env, start=[1, 1], r=rs)
PLOT_FREQUENCY = 500 BATCH_SIZE = 1024 # for faster training take a smaller batch size, not too small as batchnorm will not work otherwise GAMMA = 0.9 # already favors reaching goal faster, no need for reward_step, the lower GAMMA the faster EPS_START = 0.9 # for unstable models take higher randomness first EPS_END = 0.01 EPS_DECAY = 2000 N_SMOOTH = 500 # plotting scores averaged over this number of episodes VERBOSE = 1 # level of printed output verbosity: # 1: plot averaged episode stats # 2: also print actions taken and rewards # 3: every 100 episodes run_env() # also helpful sometimes: printing probabilities in "select_action" function of agent num_episodes = 100000 # training for how many episodes agent0 = agent.Stratego(0) agent1 = agent.RandomAgent(1) # agent1 = agent.Random(1) # agent1.model = agent0.model # if want to train by self-play env__ = teacher.Trainer(agent0, agent1, False, "custom", [0, 1]) env_name = "stratego" model = env__.agents[0].model # optimize model of agent0 model = model.to(device) optimizer = optim.Adam(model.parameters()) memory = utils.ReplayMemory(10000) # model.load_state_dict(torch.load('./saved_models/{}_current.pkl'.format(env_name))) # trained against Random train(env__, num_episodes) # model.load_state_dict(torch.load('./saved_models/{}.pkl'.format(env_name))) # trained against Random run_env(env__, 10000)
import random import game import agent import alpha_beta_agent as aba # Set random seed for reproducibility random.seed(1) # # Random vs. Random # g = game.Game( 7, # width 6, # height 4, # tokens in a row to win agent.RandomAgent("random1"), # player 1 agent.RandomAgent("random2")) # player 2 # # Human vs. Random # # g = game.Game(7, # width # 6, # height # 4, # tokens in a row to win # agent.InteractiveAgent("human"), # player 1 # agent.RandomAgent("random")) # player 2 # # Random vs. AlphaBeta # # g = game.Game(7, # width
def __getattr__(self, name): return self[name] args = dotdict({ 'max_epochs': 100, 'play_turns': 100, 'checkpoint': './checkpoint/', 'load_model': False, 'load_folder': '', }) if __name__ == "__main__": w_win = 0 b_win = 0 agent_b = agent.RandomAgent() agent_w = agent.GreedyAgent() for i_episode in range(args.max_epochs): observation = env.reset() # observation 是 3 x 8 x 8 的 list,表示当前的棋局,具体定义在 reversi.py 中的 state for t in range(args.play_turns): action = [65, 0] # action 包含 两个整型数字,action[0]表示下棋的位置,action[1] 表示下棋的颜色(黑棋0或者白棋1) ################### 黑棋 B ############################### 0表示黑棋 # 这部分 黑棋 #env.render() # 打印当前棋局 enables = env.possible_actions if len(enables) == 0: action[0] = env.board_size**2 + 1
# def _convert_point_coord_to_move(self, pointx: int, pointy: int) -> None: # ''' Converts canvas point to a move that can be inputted in the othello game ''' # row = int(pointy // self._board.get_cell_height()) # if row == self._board.get_rows(): # row -= 1 # col = int(pointx // self._board.get_cell_width()) # if col == self._board.get_columns(): # col -= 1 # return (row, col) def _on_board_resized(self, event: tkinter.Event) -> None: ''' Called whenever the canvas is resized ''' self._board.redraw_board() if __name__ == '__main__': black_wins = 0 white_wins = 0 for i in range(20): gui = OthelloGUI(agent.AlphaBetaAgent(), agent.RandomAgent()) gui.start() winner = gui.findWinner() if winner == 'B': black_wins = black_wins + 1 if winner == 'W': white_wins = white_wins + 1 print("black wins:") print(black_wins) print("white wins:") print(white_wins)
path_board = board_params["path_board"] epochs = inputs["epochs"] eval_every = inputs["eval_every"] # ---------------- Load model ------------------------- move_types = [ 'initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify' ] #%%% Create Board world = World(path_board) # Set players pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue') players = [pR1, pR2] # Set board # TODO: Send to inputs prefs = board_params board_orig = Board(world, players) board_orig.setPreferences(prefs) num_nodes = board_orig.world.map_graph.number_of_nodes() num_edges = board_orig.world.map_graph.number_of_edges() if verbose: print("\t\ttrain_model: Creating model") net = GCN_risk( num_nodes, num_edges, model_args['board_input_dim'], model_args['global_input_dim'], model_args['hidden_global_dim'],
# # g = game.Game(7, # width # 6, # height # 4, # tokens in a row to win # agent.InteractiveAgent("human"), # player 1 # agent.RandomAgent("random")) # player 2 # # Random vs. AlphaBeta g = game.Game( 10, # width 8, # height 5, # tokens in a row to win agent.RandomAgent("random"), # player 1 aba.AlphaBetaAgent("alphabeta", 4)) # player 2 # # Human vs. AlphaBeta # # g = game.Game(7, # width # 6, # height # 4, # tokens in a row to win # agent.InteractiveAgent("human"), # player 1 # aba.AlphaBetaAgent("alphabeta", 4)) # player 2 # # Human vs. Human # # g = game.Game(7, # width
def train(args): w_atk = np.random.normal(0, 1e-2, (util.NUM_FEATURES, )) w_def = np.random.normal(0, 1e-2, (util.NUM_FEATURES, )) w_atk[-1] = 0 w_def[-1] = 0 agents = [getAgent(args.agent, 0), getAgent(args.agent, 1)] for agent in agents: agent.setAttackWeights(w_atk) agent.setDefendWeights(w_def) g = dk.Durak() for i in range(args.numGames): attacker = g.getFirstAttacker() defender = int(not attacker) while True: preAttack = None preDefend = None while True: preAttack = g.getState(attacker) attack(g, attacker, agents[attacker]) postAttack = g.getState(defender) if g.roundOver(): break elif preDefend is not None: w_def = TDUpdate(preDefend, postAttack, 0, w_def) for agent in agents: agent.setDefendWeights(w_def) preDefend = postAttack defend(g, defender, agents[defender]) postDefend = g.getState(attacker) if g.roundOver(): break else: w_atk = TDUpdate(preAttack, postDefend, 0, w_atk) for agent in agents: agent.setAttackWeights(w_atk) if g.gameOver(): if g.isWinner(attacker): w_atk = TDUpdate(g.getState(attacker), None, 1, w_atk) w_def = TDUpdate(g.getState(defender), None, 0, w_def) else: w_def = TDUpdate(g.getState(defender), None, 1, w_def) w_atk = TDUpdate(g.getState(attacker), None, 0, w_atk) for agent in agents: agent.setAttackWeights(w_atk) agent.setDefendWeights(w_def) break g.endRound() # Edge case, the defender from the last round won if g.gameOver(): w_def = TDUpdate(g.getState(defender), None, 1, w_def) w_atk = TDUpdate(g.getState(attacker), None, 0, w_atk) for agent in agents: agent.setDefendWeights(w_def) agent.setAttackWeights(w_atk) break else: w_def = TDUpdate(preDefend, g.getState(defender), 0, w_def) w_atk = TDUpdate(preAttack, g.getState(attacker), 0, w_atk) for agent in agents: agent.setDefendWeights(w_def) agent.setAttackWeights(w_atk) attacker = g.attacker defender = int(not attacker) if i % 50 == 0: print(('Training iteration: %d / %d' % (i, args.numGames))) randomAgent = agt.RandomAgent() simpleAgent = agt.SimpleAgent() winCounts = {'random': 0, 'simple': 0} for _ in range(500): winVsRandom = play(dk.Durak(), [randomAgent, agents[0]]) winVsSimple = play(dk.Durak(), [simpleAgent, agents[0]]) winCounts['random'] += winVsRandom winCounts['simple'] += winVsSimple with open('results.csv', 'a') as f: row = [i, winCounts['random'], winCounts['simple']] row.extend(w_atk) row.extend(w_def) np.savetxt(f, np.array(row)[:, None].T, delimiter=',', fmt='%.4e') # save weights with open('%s_attack_%d.bin' % (args.agent, i), 'w') as f_atk: pickle.dump(w_atk, f_atk) with open('%s_defend_%d.bin' % (args.agent, i), 'w') as f_def: pickle.dump(w_def, f_def) g.newGame() with open('%s_attack.bin' % args.agent, 'w') as f_atk: pickle.dump(w_atk, f_atk) with open('%s_defend.bin' % args.agent, 'w') as f_def: pickle.dump(w_def, f_def) return w_atk, w_def
####################### # Run the tournament! # ####################### # Set random seed for reproducibility random.seed(1) # GAME CONFIGURATION depth = 4 tokens_to_win = 4 time_limit = 15 for i in range(1): random.seed(i) # Run! play_tournament( 7, # board width 6, # board height tokens_to_win, # tokens in a row to win time_limit, # time limit in seconds [ aba.AlphaBetaAgent("New AI", depth, tokens_to_win), oaba.OldAlphaBetaAgent("Old AI", depth, tokens_to_win), agent.RandomAgent("random1"), # agent.RandomAgent("random2"), # agent.RandomAgent("random3"), # agent.RandomAgent("random4"), ]) # player list
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='2048 Game w/ AI') parser.add_argument('-a', '--agent', type=str, help='name of agent (Random or Expectimax)') parser.add_argument('-d', '--depth', type=int, default=2, help='depth') parser.add_argument('-g', '--goal', type=int, default=4086, help='Goal end of game, Default: 2048') parser.add_argument('--no-graphics', action='store_true', help='no graphics (only works when AI specified)') args = parser.parse_args() Agent = None graphics = True if args.agent == 'RandomAgent': Agent = agent.RandomAgent() elif args.agent == 'Depth_limited_Expectimax_Agent': Agent = agent.Depth_limited_Expectimax_Agent(depth=args.depth) elif args.agent == 'Customized_Expectimax_Agent': Agent = agent.Customized_Expectimax_Agent(depth=args.depth) # ============================================================================= # DEMO # ============================================================================= #Agent = agent.RandomAgent() #Agent = agent.Basic_Expectimax_Agent() Agent = agent.Depth_limited_Expectimax_Agent(depth=2) #Agent = agent.Customized_Expectimax_Agent(depth = 2) # =============================================================================
def main(args=None): from optparse import OptionParser usage = "usage: %prog [options]" parser = OptionParser(usage=usage) parser.add_option("-t", "--train", dest="train", action="store_true", default=False, help="Train TD Player") parser.add_option("-d", "--draw", dest="draw", action="store_true", default=False, help="Draw game") parser.add_option("-n", "--num", dest="numgames", default=1, help="Num games to play") parser.add_option("-p", "--player1", dest="player1", default="random", help="Choose type of first player") parser.add_option("-e", "--eval", dest="eval", action="store_true", default=False, help="Play with the better eval function for player") (opts, args) = parser.parse_args(args) weights = None if opts.train: weights = train() if opts.eval: weights = load_weights(weights) evalFn = submission.logLinearEvaluation evalArgs = weights else: evalFn = submission.simpleEvaluation evalArgs = None p1 = None if opts.player1 == 'random': p1 = agent.RandomAgent(game.Game.TOKENS[0]) elif opts.player1 == 'reflex': p1 = submission.ReflexAgent(game.Game.TOKENS[0], evalFn, evalArgs) elif opts.player1 == 'expectimax': p1 = submission.ExpectimaxAgent(game.Game.TOKENS[0], evalFn, evalArgs) elif opts.player1 == 'human': p1 = agent.HumanAgent(game.Game.TOKENS[0]) p2 = agent.RandomAgent(game.Game.TOKENS[1]) if p1 is None: print "Please specify legitimate player" import sys sys.exit(1) test([p1, p2], numGames=int(opts.numgames), draw=opts.draw)