def run_game(self,agent_class1, params1, agent_class2, params2, game_params): self.games_num +=1 ''' @param agent_class1: white agent class ''' agent1 = agent_class1() agent2 = agent_class2() # initialize agents with supports myinit for agent, params in [(agent1,params1), (agent2,params2)]: if isinstance(agent, AnytimeSmartAlphaBetaPrintAgentParams): agent.myinit(**params) agents = {WHITE: agent1, BLACK: agent2} state = LinesOfActionState(game_params['size'], game_params['turns_left']) game_runner = GameRunner(state, agents, game_params['turn_time_limit'] , game_params['setup_time_limit']) try: winner = game_runner.run() print 'Winner:', winner except Exception, e: print e print "Assuming winner is Tie" winner = TIE raise
def run_game_with_game_runner(seed, players, random_generator=RandomGenerator()): original_stdout = sys.stdout try: output = StringIO() sys.stdout = output game_runner = GameRunner(seed, players, random_generator) game_runner.run() game_runner_output = output.getvalue() finally: sys.stdout = original_stdout return game_runner_output
def main(): """The entry point.""" if True: trainer = AITrainer() trainer.train() else: agent = DQNAgent() agent.model.load_weights(FILENAME) player = DQNPlayer(agent, 0, REREQUEST_TRESHOLD) while True: game_runner = GameRunner(ConsoleGamePresenter(), player, ConsoleGamePlayer()) game_runner.start_game() time.sleep(2)
def cache_size_hit_dict_test(): log_name = create_new_file_name('.'.join(['cache_size_hit_dict_test', str(date.today()), 'data'])) log = open(log_name, 'w') d = {} agents = {} regular = LoaDEAgent() opt = LoaDEAgent() regular.pre_pre_pre_setup((False, False), 0) opt.pre_pre_pre_setup((False, True), 10000) state = LinesOfActionState(8, 100000) for cache_time in range(1,10,2): hit = 0 miss = 0 agents[WHITE] = opt agents[BLACK] = regular GameRunner(state, agents, cache_time, 1).run() hit += opt.alphaBetaAnyTime.cache_hit miss += opt.alphaBetaAnyTime.cache_miss d[cache_time] = (hit, miss, hit+miss, float(hit)/(hit+miss)) print '(hit, miss): ', (hit, miss, hit+miss, float(hit)/(hit+miss)) msg('cache_size_hit_dict = ' + str(d), log) print 'cache_size_hit_dict_test done! see results in log'
def cache_size_dict_test(): log_name = create_new_file_name('.'.join(['cache_size_dict_test', str(date.today()), 'data'])) log = open(log_name, 'w') d = {} agents = {} regular = LoaDEAgent() opt = LoaDEAgent() regular.pre_pre_pre_setup((False, False), 0) for cache_size in [10, 30, 100, 300, 1000, 3000, 10000, 30000, 100000]: total = 0 opt.pre_pre_pre_setup((False, True), cache_size) state = LinesOfActionState(8, 100000) agents[WHITE] = opt agents[BLACK] = regular winner = GameRunner(state, agents, 5, 1).run() if winner == WHITE: total += 1 winner = GameRunner(state, agents, 5, 1).run() if winner == WHITE: total += 1 agents[BLACK] = opt agents[WHITE] = regular winner = GameRunner(state, agents, 5, 1).run() if winner == BLACK: total += 1 winner = GameRunner(state, agents, 5, 1).run() if winner == BLACK: total += 1 d[cache_size] = total print 'total: ', total msg('cache_size_dict = ' + str(d), log) print 'cache_size_dict_test done! see results in log'
def reordering__borad_size_dict_test(): log_name = create_new_file_name('.'.join(['reordering__borad_size_dict_test', str(date.today()), 'data'])) log = open(log_name, 'w') d = {} agents = {} regular = LoaDEAgent() opt = LoaDEAgent() regular.pre_pre_pre_setup((False, False), 0) opt.pre_pre_pre_setup((True, False), 0) for board_size in range(8,12): total = 0 state = LinesOfActionState(board_size, 100000) agents[WHITE] = opt agents[BLACK] = regular winner = GameRunner(state, agents, 3, 1).run() if winner == WHITE: total += 1 winner = GameRunner(state, agents, 3, 1).run() if winner == WHITE: total += 1 agents[BLACK] = opt agents[WHITE] = regular winner = GameRunner(state, agents, 3, 1).run() if winner == BLACK: total += 1 winner = GameRunner(state, agents, 3, 1).run() if winner == BLACK: total += 1 d[board_size] = total print 'total: ', total msg('reordering__borad_size_dict = ' + str(d), log) print 'reordering__borad_size_dict_test done! see results in log'
def __init__(self, stdscr): self.contents_pad = curses.newpad(self.PAD_HEIGHT, self.PAD_WIDTH) self.refresh_pad() self.game_runner = GameRunner() self.history_pos = 0 self.stdscr = stdscr self.stdscr.nodelay(True) self.run()
def main(): parser = argparse.ArgumentParser(description='Deep Q Learning') parser.add_argument('-m', '--model-dir', required=False, default=None, help='Path to a new or existing model directory.') parser.add_argument( '-t', '--train-model', required=False, default=True, action='store_true', help='If true, model will be trained. Otherwise, it will be evaluated.' ) parser.add_argument( '-d', '--default-config', required=False, default=os.path.join('default_configs', 'CartPole-v0.yaml'), help='The default config to use when creating a new model.') args = parser.parse_args() with tf.Session() as session: game_runner = GameRunner(session, args.default_config, args.model_dir) if args.train_model: game_runner.train() else: game_runner.evaluation()
def gen_stats(n_runs): wins = [0, 0] lengths = [] for i in range(0, n_runs): gr = GameRunner() gr.play() wins[gr.game().winner] = wins[gr.game().winner] + 1 lengths.append(gr.count) print(wins)
def run_simulation(self): if self.execution_stopped: log.warning("Can't train: the execution has been stopped.") return # Set our own signal handler for SIGINT signal.signal(signal.SIGINT, self.__signal_handler) self.main_pid = os.getpid() global_start_time = time.time() with GameRunner() as executor: # with SnakeGameExecutor(self.args) as executor: batch = self.initial_batch() while len(batch) > 0: start_time = time.time() batch_size = len(batch) log.info('Running new batch: %d jobs.', batch_size) self.run_counter += batch_size result_generator = executor.run_batch(batch) if result_generator: batch = self.create_batch_from_results(result_generator) batch_duration = time.time() - start_time log.info( 'Batch: %d simulations in %g sec:\n => %g sim/sec', batch_size, batch_duration, batch_size / batch_duration) else: time.sleep(2) if self.execution_stopped: self.training_interrupted() break simulation_duration = time.time() - global_start_time log.info('Ran %d simulations in %g sec.', self.run_counter, simulation_duration) # Unregister the signal handler signal.signal(signal.SIGINT, signal.SIG_DFL)
def train(self): with tf.Session() as sess: sess.run(self._model.var_init) game_runner = GameRunner(sess, self._model, self._env, self._memory, max_eps=1.0, min_eps=1e-1, decay=1e-4, gamma=0.97) game_runner.reset() i = 0 while True: if not self._headless: events = pygame.event.get() for event in events: if event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: self.exit() break elif event.key == pygame.K_s: self._last_model_idx += 1 self.save_model(sess, self._last_model_idx) elif event.key == pygame.K_l: self.load_model(sess, self._last_model_idx) elif event.key == pygame.K_d: self._render = not self._render elif event.key == pygame.K_PAGEUP: self._clock_tick += self._clock_tick_min self._clock_tick = self._clock_tick if self._clock_tick <= self._clock_tick_max else self._clock_tick_max print(f'\nNew clock tick: {self._clock_tick}') elif event.key == pygame.K_PAGEDOWN: self._clock_tick -= self._clock_tick_min self._clock_tick = self._clock_tick if self._clock_tick >= self._clock_tick_min else self._clock_tick_min print(f'\nNew clock tick: {self._clock_tick}') elif event.key == pygame.K_r: self._focus_high_scores = not self._focus_high_scores gr_latest = game_runner.update(self._render, self._clock_tick) if not self._render and self._focus_high_scores and gr_latest.current_score == gr_latest.highest_score - 1: self._render = True if gr_latest.game_complete: i += 1 game_runner.reset() if self._render and self._focus_high_scores: self._render = False print(f'Epochs:{i + 1:,} | Current score:{gr_latest.current_score:,} | Highest score:{gr_latest.highest_score:,} | Current time:{gr_latest.current_time:,.2f}s | Average score:{gr_latest.average_score:,.2f} | Average reward:{gr_latest.average_reward:,.2f} | Average time:{gr_latest.average_time:,.2f}s | Current ε:{gr_latest.current_eps:,.5f} ', end='\r')
def train(self): group_score_plot = [] game_group_indices_for_a_plot = [] game_indices_for_a_plot = [] rerequest_plot = [] group_rerequest_plot = [] counter_games = 0 group_size = 10 group_score = 0 group_rerequest_score = 0 while counter_games < TOTAL_GAMES: # Collecting data for group plots. if counter_games % group_size == 0: game_group_indices_for_a_plot.append(counter_games) group_score_plot.append(group_score) group_score = 0 group_rerequest_plot.append(group_rerequest_score) group_rerequest_score = 0 # Initialize classes # The epsilon argument is set to give randomness to actions. self.player = DQNPlayer(self.agent, EPSILON_START - counter_games, REREQUEST_TRESHOLD) # Configura a game (DQN_AI vs Random) whithout any presentation. game_runner = GameRunner(game_presenter=None, player1=self.player, player2=RandomPlayer()) self.state_old = self.agent.get_state(game_runner.game) # Configuring DQN-learning function to run after each game turn. game_runner.on_mark_selected = self.on_mark_selected_callback # Run the game. game_runner.start_game() # Fit DQN for new data. self.agent.replay_new(self.agent.memory) score = self.get_score(game_runner.game, self.player) group_score += score rerequest_plot.append(self.player.total_rerequest_count) group_rerequest_score += self.player.total_rerequest_count game_indices_for_a_plot.append(counter_games) counter_games += 1 print(f'{counter_games}/{TOTAL_GAMES}') # Save model weights for future use. self.agent.model.save_weights(FILENAME) # Prepare learning plots. sns.set(color_codes=True) # Plot 1: # - score accumulated for each group_size games. # - normalized cell rerequest rate for each group_size games. plt.figure(1) ax = sns.regplot(np.array([game_group_indices_for_a_plot])[0], np.array([group_score_plot])[0], color="b", x_jitter=.1, line_kws={'color': 'green'}) ax.set(xlabel='games', ylabel='score') group_rerequest_plot = [ x / max(group_rerequest_plot) for x in group_rerequest_plot ] ax = sns.regplot(np.array([game_group_indices_for_a_plot])[0], np.array([group_rerequest_plot])[0], color="r", x_jitter=.1, line_kws={'color': 'blue'}) ax.set(xlabel='games', ylabel='rerequest') # Plot 2: plain rerequest count plot. plt.figure(2) bx = sns.regplot(np.array([game_indices_for_a_plot])[0], np.array([rerequest_plot])[0], color="b", x_jitter=.1, line_kws={'color': 'green'}) bx.set(xlabel='games', ylabel='rerequest_count') # Show plots. plt.show()
def move(self, game_state): print game_state inp = raw_input( self.player + ' turn. Enter <row col action> (action is N/NE/E/SE/S/SW/W/NW/SPIN): ' ) row, col, action = inp.split() row = int(row) col = int(col) action = action.upper() res = [] if (action == 'SPIN'): res = SpinAction(row, col) else: direction_idx = DIRECTIONS.index(Direction(action, (0, 0))) res = MoveAction(row, col, DIRECTIONS[direction_idx]) return res def setup(self, player, game_state, turn_time_limit, setup_time_limit): self.player = player agents = {} agents[WHITE] = AlphaBetaAgent() agents[BLACK] = DummyAgent() state = LinesOfActionState(6, 50) winner = GameRunner(state, agents, 2, 1).run() print 'Winner:', winner
max_rewards = [] mean_rewards = [] median_rewards = [] for play_threshold in PLAYS: max_reward = [] mean_reward = [] median_reward = [] for base_discard in DISCARDS: agents = [GreedyAgent(config) for i in range(2)] for agent in agents: agent.play_threshold = play_threshold agent.discard_thresholds = [ base_discard + i * (play_threshold - base_discard) / 9 for i in range(9) ] runner = GameRunner(agents, EPISODES, config) runner.run() rewards = runner.rewards max_reward.append(np.max(rewards)) mean_reward.append(np.mean(rewards)) median_reward.append(np.median(rewards)) max_rewards.append(max_reward) mean_rewards.append(mean_reward) median_rewards.append(median_reward) # Plot 3D data fig = plt.figure() X = np.array([[PLAYS[i] for j in range(len(DISCARDS))] for i in range(len(PLAYS))]) Y = np.array([DISCARDS[:] for i in range(len(PLAYS))])
def main(): """The entry point.""" game_runner = GameRunner(ConsoleGamePresenter(), ConsoleGamePlayer(), ConsoleGamePlayer()) game_runner.start_game()
def reset(self): self.game_runner = GameRunner() self.history_pos = self.game_runner.count
import gym import environments from agents.agent_q import AgentQ from game_runner import GameRunner # Make a gym env = gym.make('LetterNoose-v0') env.reset() # Make an agent agent = AgentQ(env, 27**3) # Run games game = GameRunner(env, agent) game.trainEpisodes(300) # View stats game.printTrainStats()