Esempio n. 1
0
 def run_game(self,agent_class1, params1, agent_class2, params2, game_params):
     self.games_num +=1
     '''
     @param agent_class1: white agent class
     '''
     agent1 = agent_class1()
     agent2 = agent_class2()
     # initialize agents with supports myinit
     for agent, params in [(agent1,params1), (agent2,params2)]:
         if isinstance(agent, AnytimeSmartAlphaBetaPrintAgentParams):
             agent.myinit(**params)
     
     agents = {WHITE: agent1, BLACK: agent2}
     state = LinesOfActionState(game_params['size'], game_params['turns_left'])
     game_runner =  GameRunner(state, agents, game_params['turn_time_limit'] , 
                             game_params['setup_time_limit'])
     
     try:
         winner = game_runner.run()
         print 'Winner:', winner
     
     except Exception, e:
         print e
         print "Assuming winner is Tie"
         winner = TIE
         raise
def run_game_with_game_runner(seed, players, random_generator=RandomGenerator()):
    original_stdout = sys.stdout
    try:
        output = StringIO()
        sys.stdout = output
        game_runner = GameRunner(seed, players, random_generator)
        game_runner.run()
        game_runner_output = output.getvalue()
    finally:
        sys.stdout = original_stdout
    return game_runner_output
Esempio n. 3
0
def main():
    """The entry point."""
    if True:
        trainer = AITrainer()
        trainer.train()
    else:
        agent = DQNAgent()
        agent.model.load_weights(FILENAME)
        player = DQNPlayer(agent, 0, REREQUEST_TRESHOLD)
        while True:
            game_runner = GameRunner(ConsoleGamePresenter(), player,
                                     ConsoleGamePlayer())
            game_runner.start_game()
            time.sleep(2)
Esempio n. 4
0
def cache_size_hit_dict_test():
    log_name = create_new_file_name('.'.join(['cache_size_hit_dict_test', str(date.today()), 'data']))
    log = open(log_name, 'w')
    d = {}
    agents = {}
    regular = LoaDEAgent()
    opt = LoaDEAgent()
    
    regular.pre_pre_pre_setup((False, False), 0)
    opt.pre_pre_pre_setup((False, True), 10000)
    state = LinesOfActionState(8, 100000)
    
    for cache_time in range(1,10,2):
        hit = 0
        miss = 0
        
        agents[WHITE] = opt
        agents[BLACK] = regular
        
        GameRunner(state, agents, cache_time, 1).run()
        hit += opt.alphaBetaAnyTime.cache_hit
        miss += opt.alphaBetaAnyTime.cache_miss
                    
        d[cache_time] = (hit, miss, hit+miss, float(hit)/(hit+miss))
        
        print '(hit, miss): ', (hit, miss, hit+miss, float(hit)/(hit+miss))

    msg('cache_size_hit_dict = ' + str(d), log)  
    print 'cache_size_hit_dict_test done! see results in log'    
Esempio n. 5
0
def cache_size_dict_test():
    log_name = create_new_file_name('.'.join(['cache_size_dict_test', str(date.today()), 'data']))
    log = open(log_name, 'w')
    d = {}
    agents = {}
    regular = LoaDEAgent()
    opt = LoaDEAgent()
    
    regular.pre_pre_pre_setup((False, False), 0)
    
    
    for cache_size in [10, 30, 100, 300, 1000, 3000, 10000, 30000, 100000]:
        total = 0
        opt.pre_pre_pre_setup((False, True), cache_size)
        state = LinesOfActionState(8, 100000)
        
        agents[WHITE] = opt
        agents[BLACK] = regular
        
        winner = GameRunner(state, agents, 5, 1).run()
        if winner == WHITE:
            total += 1
        
        winner = GameRunner(state, agents, 5, 1).run()
        if winner == WHITE:
            total += 1
        
        agents[BLACK] = opt
        agents[WHITE] = regular
        
        winner = GameRunner(state, agents, 5, 1).run()
        if winner == BLACK:
            total += 1
        
        winner = GameRunner(state, agents, 5, 1).run()
        if winner == BLACK:
            total += 1
            
        d[cache_size] = total
        
        print 'total: ', total

    msg('cache_size_dict = ' + str(d), log)  
    print 'cache_size_dict_test done! see results in log'    
Esempio n. 6
0
def reordering__borad_size_dict_test():
    log_name = create_new_file_name('.'.join(['reordering__borad_size_dict_test', str(date.today()), 'data']))
    log = open(log_name, 'w')
    d = {}
    agents = {}
    regular = LoaDEAgent()
    opt = LoaDEAgent()
    
    regular.pre_pre_pre_setup((False, False), 0)
    opt.pre_pre_pre_setup((True, False), 0)
    
    for board_size in range(8,12):
        total = 0
        state = LinesOfActionState(board_size, 100000)
        
        agents[WHITE] = opt
        agents[BLACK] = regular
        
        winner = GameRunner(state, agents, 3, 1).run()
        if winner == WHITE:
            total += 1
        
        winner = GameRunner(state, agents, 3, 1).run()
        if winner == WHITE:
            total += 1
        
        agents[BLACK] = opt
        agents[WHITE] = regular
        
        winner = GameRunner(state, agents, 3, 1).run()
        if winner == BLACK:
            total += 1
        
        winner = GameRunner(state, agents, 3, 1).run()
        if winner == BLACK:
            total += 1
            
        d[board_size] = total
        
        print 'total: ', total

    msg('reordering__borad_size_dict = ' + str(d), log)  
    print 'reordering__borad_size_dict_test done! see results in log'    
Esempio n. 7
0
    def __init__(self, stdscr):
        self.contents_pad = curses.newpad(self.PAD_HEIGHT, self.PAD_WIDTH)
        self.refresh_pad()

        self.game_runner = GameRunner()
        self.history_pos = 0

        self.stdscr = stdscr
        self.stdscr.nodelay(True)

        self.run()
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser(description='Deep Q Learning')

    parser.add_argument('-m',
                        '--model-dir',
                        required=False,
                        default=None,
                        help='Path to a new or existing model directory.')

    parser.add_argument(
        '-t',
        '--train-model',
        required=False,
        default=True,
        action='store_true',
        help='If true, model will be trained. Otherwise, it will be evaluated.'
    )

    parser.add_argument(
        '-d',
        '--default-config',
        required=False,
        default=os.path.join('default_configs', 'CartPole-v0.yaml'),
        help='The default config to use when creating a new model.')

    args = parser.parse_args()

    with tf.Session() as session:
        game_runner = GameRunner(session, args.default_config, args.model_dir)

        if args.train_model:
            game_runner.train()
        else:
            game_runner.evaluation()
Esempio n. 9
0
def gen_stats(n_runs):
    wins = [0, 0]
    lengths = []
    for i in range(0, n_runs):
        gr = GameRunner()
        gr.play()
        wins[gr.game().winner] = wins[gr.game().winner] + 1
        lengths.append(gr.count)
    print(wins)
Esempio n. 10
0
    def run_simulation(self):
        if self.execution_stopped:
            log.warning("Can't train: the execution has been stopped.")
            return

        # Set our own signal handler for SIGINT
        signal.signal(signal.SIGINT, self.__signal_handler)
        self.main_pid = os.getpid()

        global_start_time = time.time()
        with GameRunner() as executor:
            # with SnakeGameExecutor(self.args) as executor:
            batch = self.initial_batch()

            while len(batch) > 0:
                start_time = time.time()
                batch_size = len(batch)

                log.info('Running new batch: %d jobs.', batch_size)
                self.run_counter += batch_size

                result_generator = executor.run_batch(batch)

                if result_generator:
                    batch = self.create_batch_from_results(result_generator)

                    batch_duration = time.time() - start_time
                    log.info(
                        'Batch: %d simulations in %g sec:\n => %g sim/sec',
                        batch_size, batch_duration,
                        batch_size / batch_duration)
                else:
                    time.sleep(2)

                if self.execution_stopped:
                    self.training_interrupted()
                    break

        simulation_duration = time.time() - global_start_time
        log.info('Ran %d simulations in %g sec.', self.run_counter,
                 simulation_duration)

        # Unregister the signal handler
        signal.signal(signal.SIGINT, signal.SIG_DFL)
Esempio n. 11
0
    def train(self):
        with tf.Session() as sess:
            sess.run(self._model.var_init)
            game_runner = GameRunner(sess, self._model, self._env, self._memory, max_eps=1.0, min_eps=1e-1, decay=1e-4, gamma=0.97)
            game_runner.reset()
            i = 0

            while True:
                if not self._headless:
                    events = pygame.event.get()
                    for event in events:
                        if event.type == pygame.KEYDOWN:
                            if event.key == pygame.K_ESCAPE:
                                self.exit()
                                break
                            elif event.key == pygame.K_s:
                                self._last_model_idx += 1
                                self.save_model(sess, self._last_model_idx)
                            elif event.key == pygame.K_l:
                                self.load_model(sess, self._last_model_idx)
                            elif event.key == pygame.K_d:
                                self._render = not self._render
                            elif event.key == pygame.K_PAGEUP:
                                self._clock_tick += self._clock_tick_min
                                self._clock_tick = self._clock_tick if self._clock_tick <= self._clock_tick_max else self._clock_tick_max
                                print(f'\nNew clock tick: {self._clock_tick}')
                            elif event.key == pygame.K_PAGEDOWN:
                                self._clock_tick -= self._clock_tick_min
                                self._clock_tick = self._clock_tick if self._clock_tick >= self._clock_tick_min else self._clock_tick_min
                                print(f'\nNew clock tick: {self._clock_tick}')
                            elif event.key == pygame.K_r:
                                self._focus_high_scores = not self._focus_high_scores

                gr_latest = game_runner.update(self._render, self._clock_tick)

                if not self._render and self._focus_high_scores and gr_latest.current_score == gr_latest.highest_score - 1:
                    self._render = True

                if gr_latest.game_complete:
                    i += 1
                    game_runner.reset()

                    if self._render and self._focus_high_scores:
                        self._render = False

                print(f'Epochs:{i + 1:,} | Current score:{gr_latest.current_score:,} | Highest score:{gr_latest.highest_score:,} | Current time:{gr_latest.current_time:,.2f}s | Average score:{gr_latest.average_score:,.2f} | Average reward:{gr_latest.average_reward:,.2f} | Average time:{gr_latest.average_time:,.2f}s | Current ε:{gr_latest.current_eps:,.5f}  ', end='\r')
Esempio n. 12
0
    def train(self):
        group_score_plot = []
        game_group_indices_for_a_plot = []
        game_indices_for_a_plot = []
        rerequest_plot = []
        group_rerequest_plot = []

        counter_games = 0
        group_size = 10
        group_score = 0
        group_rerequest_score = 0

        while counter_games < TOTAL_GAMES:
            # Collecting data for group plots.
            if counter_games % group_size == 0:
                game_group_indices_for_a_plot.append(counter_games)
                group_score_plot.append(group_score)
                group_score = 0
                group_rerequest_plot.append(group_rerequest_score)
                group_rerequest_score = 0

            # Initialize classes
            # The epsilon argument is set to give randomness to actions.
            self.player = DQNPlayer(self.agent, EPSILON_START - counter_games,
                                    REREQUEST_TRESHOLD)

            # Configura a game (DQN_AI vs Random) whithout any presentation.
            game_runner = GameRunner(game_presenter=None,
                                     player1=self.player,
                                     player2=RandomPlayer())
            self.state_old = self.agent.get_state(game_runner.game)

            # Configuring DQN-learning function to run after each game turn.
            game_runner.on_mark_selected = self.on_mark_selected_callback

            # Run the game.
            game_runner.start_game()

            # Fit DQN for new data.
            self.agent.replay_new(self.agent.memory)

            score = self.get_score(game_runner.game, self.player)
            group_score += score
            rerequest_plot.append(self.player.total_rerequest_count)
            group_rerequest_score += self.player.total_rerequest_count

            game_indices_for_a_plot.append(counter_games)
            counter_games += 1
            print(f'{counter_games}/{TOTAL_GAMES}')

        # Save model weights for future use.
        self.agent.model.save_weights(FILENAME)

        # Prepare learning plots.
        sns.set(color_codes=True)

        # Plot 1:
        # - score accumulated for each group_size games.
        # - normalized cell rerequest rate for each group_size games.
        plt.figure(1)
        ax = sns.regplot(np.array([game_group_indices_for_a_plot])[0],
                         np.array([group_score_plot])[0],
                         color="b",
                         x_jitter=.1,
                         line_kws={'color': 'green'})
        ax.set(xlabel='games', ylabel='score')

        group_rerequest_plot = [
            x / max(group_rerequest_plot) for x in group_rerequest_plot
        ]

        ax = sns.regplot(np.array([game_group_indices_for_a_plot])[0],
                         np.array([group_rerequest_plot])[0],
                         color="r",
                         x_jitter=.1,
                         line_kws={'color': 'blue'})
        ax.set(xlabel='games', ylabel='rerequest')

        # Plot 2: plain rerequest count plot.
        plt.figure(2)
        bx = sns.regplot(np.array([game_indices_for_a_plot])[0],
                         np.array([rerequest_plot])[0],
                         color="b",
                         x_jitter=.1,
                         line_kws={'color': 'green'})
        bx.set(xlabel='games', ylabel='rerequest_count')

        # Show plots.
        plt.show()
Esempio n. 13
0
    def move(self, game_state):
        print game_state
        inp = raw_input(
            self.player +
            ' turn. Enter <row col action> (action is N/NE/E/SE/S/SW/W/NW/SPIN): '
        )
        row, col, action = inp.split()
        row = int(row)
        col = int(col)
        action = action.upper()
        res = []
        if (action == 'SPIN'):
            res = SpinAction(row, col)
        else:
            direction_idx = DIRECTIONS.index(Direction(action, (0, 0)))
            res = MoveAction(row, col, DIRECTIONS[direction_idx])
        return res

    def setup(self, player, game_state, turn_time_limit, setup_time_limit):
        self.player = player


agents = {}
agents[WHITE] = AlphaBetaAgent()
agents[BLACK] = DummyAgent()

state = LinesOfActionState(6, 50)

winner = GameRunner(state, agents, 2, 1).run()
print 'Winner:', winner
max_rewards = []
mean_rewards = []
median_rewards = []
for play_threshold in PLAYS:
    max_reward = []
    mean_reward = []
    median_reward = []
    for base_discard in DISCARDS:
        agents = [GreedyAgent(config) for i in range(2)]
        for agent in agents:
            agent.play_threshold = play_threshold
            agent.discard_thresholds = [
                base_discard + i * (play_threshold - base_discard) / 9
                for i in range(9)
            ]
        runner = GameRunner(agents, EPISODES, config)
        runner.run()
        rewards = runner.rewards
        max_reward.append(np.max(rewards))
        mean_reward.append(np.mean(rewards))
        median_reward.append(np.median(rewards))
    max_rewards.append(max_reward)
    mean_rewards.append(mean_reward)
    median_rewards.append(median_reward)

# Plot 3D data
fig = plt.figure()

X = np.array([[PLAYS[i] for j in range(len(DISCARDS))]
              for i in range(len(PLAYS))])
Y = np.array([DISCARDS[:] for i in range(len(PLAYS))])
Esempio n. 15
0
def main():
    """The entry point."""
    game_runner = GameRunner(ConsoleGamePresenter(), ConsoleGamePlayer(),
                             ConsoleGamePlayer())
    game_runner.start_game()
Esempio n. 16
0
 def reset(self):
     self.game_runner = GameRunner()
     self.history_pos = self.game_runner.count
Esempio n. 17
0
import gym
import environments
from agents.agent_q import AgentQ
from game_runner import GameRunner

# Make a gym
env = gym.make('LetterNoose-v0')
env.reset()

# Make an agent
agent = AgentQ(env, 27**3)

# Run games
game = GameRunner(env, agent)
game.trainEpisodes(300)

# View stats
game.printTrainStats()