def placeFigures(self, board: GameBoard, state: GameState) -> None: """ Uses the placeFigures() method of the GreedyAgent class. :param board: board of the game :param state: the current state """ # TODO: find a better idea? ga = GreedyAgent(self.team) ga.placeFigures(board, state)
def placeFigures(self, board: GameBoard, state: GameState) -> None: """ Uses GreedyAgent's placer() method. :param board: board of the game :param state: the current state """ # TODO: find a better placer ga = GreedyAgent(self.team) ga.placeFigures(board, state)
def pick_low_num_for_streak(self): greedyAgent = GreedyAgent("greedy") greedyAgent.cards = [(2, "Fire"), (3, "Ice"), (4, "Water")] greedyAgent.pickCard() greedyAgent.accumulatedCards["Water"] += 1 greedyAgent.cards.append((1, "Water")) greedyAgent.cards.append((0, "Water")) greedyAgent.pickCard() self.assertEqual(greedyAgent.playedCard, (1, "Water")) greedyAgent.accumulatedCards["Water"] += 1 greedyAgent.pickCard() self.assertEqual(greedyAgent.playedCard, (0, "Water"))
def start_demo_game(n_agents: int, game_duration: int, board_width: int, board_height: int, n_fruits: int, use_keyboard_listener: bool): players = [KeyboardPlayer(use_keyboard_listener=use_keyboard_listener)] + [GreedyAgent() for _ in range(n_agents - 1)] start_game_with_players(players, game_duration, board_width, board_height, n_fruits, fast_run=not use_keyboard_listener)
def agent(self, team, seed) -> Agent: if self.kind == 'gre': return GreedyAgent(team, seed=seed) if self.kind == 'cls': return ClassifierAgent(team, self.filename, seed=seed) if self.kind == 'reg': return RegressionAgent(team, self.filename, seed=seed) return RandomAgent(team)
def start_part_e(n_agents: int, game_duration: int, board_width: int, board_height: int, n_fruits: int, fast_run: bool, graphics_off: bool): players = [AlphaBetaAgent()] + [GreedyAgent() for _ in range(n_agents - 1)] start_game_with_players(players, game_duration, board_width, board_height, n_fruits, fast_run=fast_run, graphics_off=graphics_off)
def get_player(p: str): if p == 'KeyboardPlayer': return KeyboardPlayer(use_keyboard_listener=use_keyboard_listener) elif p == 'GreedyAgent': return GreedyAgent() elif p == 'BetterGreedyAgent': return BetterGreedyAgent() elif p == 'MinimaxAgent': return MinimaxAgent() elif p == 'AlphaBetaAgent': return AlphaBetaAgent() elif p == 'TournamentAgent': return TournamentAgent()
def parse_agents(self): agentsTemp = self.agents self.agents = {} # iterate through agents for agent in agentsTemp: # pick a random spawn location x = random.randrange(0, self.windowWidth, 40) y = random.randrange(0, self.windowHeight, 40) # initialize agent if agent == "closestcoin": self.agents[agent] = ClosestCoinAgent(8, 2, self.coins) continue if agent == "density": self.agents[agent] = DensityAgent(9, 2, self.coins) continue if agent == "greedy": self.agents[agent] = GreedyAgent(10, 2, self.coins) continue
ucb_agent_score = np.zeros((Episodes, GameSteps), np.float32) best_agent_score = np.zeros((Episodes, GameSteps), np.float32) #Average Best Action random_agent_best_prop = np.zeros((Episodes, GameSteps), np.float32) greedy_agent_best_prop = np.zeros((Episodes, GameSteps), np.float32) egreedy_agent_best_prop = np.zeros((Episodes, GameSteps), np.float32) egreedy_agent2_best_prop = np.zeros((Episodes, GameSteps), np.float32) ogreedy_agent_best_prop = np.zeros((Episodes, GameSteps), np.float32) ucb_agent_best_prop = np.zeros((Episodes, GameSteps), np.float32) for episode in range(Episodes): env = MultiArm_Bandit(N, episode) best_action = np.argmax(env.bandit_config) random_agent = RandomAgent(N) greedy_agent = GreedyAgent(N) egreedy_agent = EGreedyAgent(N, 0.1) egreedy_agent2 = EGreedyAgent(N, 0.01) ogreedy_agent = OptimisticGreedyAgent(N) ucb_agent = UpperBoundAgent(N, 2) for i in range(ExploreSteps): action = np.random.choice(N) reward = env.step(action) random_agent.update_estimation(action, reward) greedy_agent.update_estimation(action, reward) egreedy_agent.update_estimation(action, reward) egreedy_agent2.update_estimation(action, reward) ogreedy_agent.update_estimation(action, reward) ucb_agent.update_estimation(action, reward)
p1.accumulatedCards[p1.playedCard[1]] += 1 if roundWinner == p2: p2.accumulatedCards[p2.playedCard[1]] += 1 if gameState.judgeGameOver(p1, p2) == p1: return p1.name elif gameState.judgeGameOver(p1, p2) == p2: return p2.name p1.cards.append(d1.generateRandomCard()) p2.cards.append(d1.generateRandomCard()) if __name__ == "__main__": now = datetime.now() dateString = now.strftime("%d-%m-%Y %HH %MM %SS.txt") sys.setrecursionlimit(10000000) # p1 = ApproximateQLearningAgent("aqlearn") p2 = GreedyAgent("Greedy") p1 = RandomAgent("random") games = 10000 wins = 0 for i in range(0, games): if runGame(p1, p2) == "Greedy": wins += 1 print(p2.name + " won", str(wins), "out of", games, "games")
seed) blue = RegressionMultiAgent(BLUE, 'models/Junction_blue_attack.joblib', 'models/Junction_blue_move.joblib', 'models/Junction_blue_pass.joblib', seed) # agents that use classifiers or regressors just need one model red = ClassifierAgent(RED, 'models/Junction_cls_red.joblib', seed=seed) blue = ClassifierAgent(BLUE, 'models/Junction_cls_blue.joblib', seed=seed) red = RegressionAgent(RED, 'models/Junction_reg_red.joblib', seed=seed) blue = RegressionAgent(BLUE, 'models/Junction_reg_blue.joblib', seed=seed) # greedy agents instead don't require models red = GreedyAgent(RED, seed=seed) blue = GreedyAgent(BLUE, seed=seed) # different agents can have different set of parameters red = AlphaBetaFast1Agent(RED, maxDepth=3) blue = AlphaBetaFast1Agent(BLUE, maxDepth=3) # the MatchManager is the object that is in charge of control the evolution of a game mm = MatchManager('', red, blue, board, state, seed=seed) # there is a dedicated method to play the full game mm.play() # at the end it is possible to collect some information from the MatchManager object, like the winner logger.info('winner: ', mm.winner)
def pick_random(self): greedyAgent = GreedyAgent("greedy") greedyAgent.cards = [(3, "Fire"), (3, "Ice"), (3, "Water")]
def pick_highest_num_test(self): greedyAgent = GreedyAgent("greedy") greedyAgent.cards = [(2, "Fire"), (3, "Ice"), (4, "Water")] greedyAgent.pickCard() self.assertEqual(greedyAgent.playedCard, (4, "Water"))
enabled = {"greedy": True, "random": False, "multib": False, "thomp": False} # learnrates = [0.01] #[0.05, 0.04, 0.03, 0.02, 0.01, 0.005] # regulizers = [1e-3] #[0.01, 0.005, 0.001, 0.0005, 0.0001] n_exp = 1 # priors = ThompsonLogisticAgent.parse_priors([os.path.join('agents', file) for file in os.listdir('agents') if 'thomp(0.0100,0.0010)' in file]) if __name__ == "__main__": now = time.time() experiments = [] for runid in range(10001, 10011): # runid = random.choice(range(10000)) str_runid = str(runid).zfill(4) # Greedy agent if enabled["greedy"]: greedy_name = "greedy_runid_" + str(runid).zfill(4) greedy_agent = GreedyAgent(greedy_name) exp_greedy = Experiment(greedy_agent, greedy_name, run_idx=[runid]) experiments.append(exp_greedy) exp_greedy.start() # Random agent # if enabled["random"]: # random_name = "random_runid_" + str(runid).zfill(4) # random_agent = RandomAgent(random_name) # exp_random = Experiment(random_agent, random_name, run_idx=[runid]) # exp_random.start() # Multi beta agent # if enabled["multib"]: # multib_name = "multibeta_runid_" + str(runid).zfill(4) # multib_agent = MultiBetaAgent(multib_name) # exp_multib = Experiment(multib_agent, multib_name, run_idx=[runid]) # experiments.append(exp_multib)
def start_part_g(n_agents: int, game_duration: int, board_width: int, board_height: int, n_fruits: int, fast_run: bool, graphics_off: bool): length_1 = [0] time_1 = [0] for i in range(10): players = [GreedyAgent() for _ in range(n_agents)] start_game_with_players(players, game_duration, board_width, board_height, n_fruits, fast_run=fast_run, graphics_off=graphics_off, length=length_1, time=time_1) print(length_1[0]/10, time_1[0]/10) length_2 = [0] time_2 = [0] for i in range(10): players = [BetterGreedyAgent()] + [GreedyAgent() for _ in range(n_agents - 1)] start_game_with_players(players, game_duration, board_width, board_height, n_fruits, fast_run=fast_run, graphics_off=graphics_off, length=length_2, time=time_2) print(length_2[0] / 10, time_2[0] / 10) length_3 = [[0], [0], [0]] time_3 = [[0], [0], [0]] for depth in [2, 4, 6]: for i in range(10): print(depth) players = [MinimaxAgent()] + [GreedyAgent() for _ in range(n_agents - 1)] start_game_with_players(players, game_duration, board_width, board_height, n_fruits, fast_run=fast_run, graphics_off=graphics_off, depth=depth, length=length_3[int(depth / 2 - 1)], time=time_3[int(depth / 2 - 1)]) print(length_3[1][0] / 10, time_3[1][0] / 10) length_4 = [[0], [0], [0]] time_4 = [[0], [0], [0]] for depth in [2, 4, 6]: for i in range(10): players = [AlphaBetaAgent()] + [GreedyAgent() for _ in range(n_agents - 1)] start_game_with_players(players, game_duration, board_width, board_height, n_fruits, fast_run=fast_run, graphics_off=graphics_off, depth=depth, length=length_4[int(depth / 2 - 1)], time=time_4[int(depth / 2 - 1)]) with open('experiment.csv', 'w') as csv_file: writer = csv.writer(csv_file) writer.writerow(['GreedyAgent', length_1[0] / 10, time_1[0] / 10]) writer.writerow(['betterAgent', length_2[0] / 10, time_2[0] / 10]) for i in range(3): writer.writerow(['MinMaxAgent', 2 * i, length_3[i][0] / 10, time_3[i][0] / 10]) for i in range(3): writer.writerow(['AlphaBetaAgent', 2 * i, length_4[i][0] / 10, time_4[i][0] / 10])
import sys from othello import Othello from agents import MinimaxAgent, GreedyAgent, AlphaBetaAgent import numpy as np import time print('Welcome to Reversi!') epoch = 0 maxEpoch = 20 game = Othello() game.resetBoard() tile1, tile2 = 'O', 'X' agent1 = GreedyAgent(tile1, game) # agent2 = GreedyAgent(tile2, game) # agent1 = MinimaxAgent(tile1, game, 2) # agent2 = MinimaxAgent(tile2, game, 2) # agent1 = AlphaBetaAgent(tile1, game, 4) agent2 = AlphaBetaAgent(tile2, game, 4) render = False score1 = [] score2 = [] start = time.time() #Start a new round while True: game.resetBoard() roundStartTurn = 'player' while True: if roundStartTurn == 'computer':