Ejemplo n.º 1
0
def main():
	robot = Robot((18,1), True)
	#robot.set_speed(100)
	
	#_real_map = Arena(robot)
	#ArenaUtils.load_arena_from_file(_real_map, 'map/SampleWeek11.txt')
	_explore_map = Arena(robot)
	_explore_map.set_allunexplored()

	CommMgr.connect()
	_explore = Exploration(_explore_map, robot, 300, 3600)
	_explore.run()
	CommMgr.close()
Ejemplo n.º 2
0
def fp():
    robot = Robot((18, 1), True)
    _real_map = Arena(robot)
    ArenaUtils.load_arena_from_file(_real_map, 'map/17_week10.txt')
    print('Awaiting FP_START')
    while True:
        _command = CommMgr.recv()
        if _command == 'FP_START':
            CommMgr.send('X', CommMgr.ARDUINO)
            break

    _go_to_wp_goal = FastestPath(_real_map, robot)
    _status = _go_to_wp_goal.do_fastest_path_wp_goal(
        (7, 10), ArenaConstant.GOAL_POS.value)
Ejemplo n.º 3
0
def explore():
    robot = Robot((18, 1), True)
    _explore_map = Arena(robot)
    _explore_map.set_allunexplored()
    _explore = Exploration(_explore_map, robot, 300, 3600)
    _explore.run()
Ejemplo n.º 4
0
import gin

from gym_splendor_code.envs.mechanics.abstract_observation import DeterministicObservation
from gym_splendor_code.envs.mechanics.state import State
from nn_models.architectures.average_pool_v0 import StateEncoder, ValueRegressor, IdentityTransformer
gin.parse_config_file(
    '/home/tomasz/ML_Research/splendor/gym-splendor/experiments/MCTS_series_1/params.gin'
)

from agents.random_agent import RandomAgent
from agents.single_mcts_agent import SingleMCTSAgent
from arena.arena import Arena
from monte_carlo_tree_search.evaluation_policies.value_evaluator_nn import ValueEvaluator
from monte_carlo_tree_search.mcts_algorithms.single_process.single_mcts import SingleMCTS

arek = Arena()

a1 = RandomAgent()
a2 = SingleMCTSAgent(5, ValueEvaluator(), 0.6, True, True)
#
results = arek.run_one_duel('deterministic', [a1, a2])

# state1 = State()
# fufu = SingleMCTS(5, 0.6,  ValueEvaluator())
# fufu.create_root(DeterministicObservation(state1))
# fufu.run_mcts_pass()
Ejemplo n.º 5
0
from arena.arena import Arena
from pygame.locals import *
from arena.wall_initialiser import initialise_walls
from arena.boy_initialiser import initialise_boys
from brains.pathfinding.grid import BackgroundGrid
import pygame

SCREENRECT = Rect(0, 0, 640, 640)

pygame.init()
winstyle = 0
bestdepth = pygame.display.mode_ok(SCREENRECT.size, winstyle, 32)
screen = pygame.display.set_mode((640, 640), winstyle, bestdepth)

arena = Arena(SCREENRECT)

method = 'steering'

initialise_walls(arena, method)
initialise_boys(arena, method)

clock = pygame.time.Clock()


def main(screen, arena):
    while 1:
        for event in pygame.event.get():
            if event.type == QUIT:
                return
        screen.fill((30, 30, 30))
        arena.update_screen_objects(screen)
Ejemplo n.º 6
0
class StateEncoder(AbstractModel):
   def __init__(self,
                gems_encoder_dim : int = None,
                price_encoder_dim : int = None,
                profit_encoder_dim : int = None,
                cards_points_dim: int = None,
                cards_dense1_dim: int = None,
                cards_dense2_dim: int = None,
                board_nobles_dense1_dim : int = None,
                board_nobles_dense2_dim : int = None,
                full_board_dense1_dim: int = None,
                full_board_dense2_dim: int = None,
                player_points_dim: int = None,
                player_nobles_dim: int = None,
                full_player_dense1_dim: int = None,
                full_player_dense2_dim: int = None,
                final_layer= None,
                data_transformer = None,
                network_name: str = None
                ):
       super().__init__()
       self.vectorizer = Vectorizer()
       self.final_layer = final_layer
       self.data_transformer = data_transformer

       self.params['data transormation'] = self.data_transformer.name
       self.params['final layer name'] = self.final_layer.name
       self.params['gems_encoder_dim'] = gems_encoder_dim
       self.params['gems_encoder_dim'] = gems_encoder_dim
       self.params['price_encoder_dim'] = price_encoder_dim
       self.params['profit_encoder_dim'] = profit_encoder_dim
       self.params['cards_points_dim'] = cards_points_dim
       self.params['cards_dense1_dim'] = cards_dense1_dim
       self.params['cards_dense2_dim'] = cards_dense2_dim
       self.params['board_nobles_dense1_dim'] = board_nobles_dense1_dim
       self.params['board_nobles_dense2_dim'] = board_nobles_dense2_dim
       self.params['full_board_dense1_dim']= full_board_dense1_dim
       self.params['full_board_dense2_dim'] = full_board_dense2_dim
       self.params['player_points_dim'] = player_points_dim
       self.params['player_nobles_dim'] = player_nobles_dim
       self.params['full_player_dense1_dim'] = full_player_dense1_dim
       self.params['full_player_dense2_dim']= full_player_dense2_dim


       self.arena = Arena()
       self.network_agent = ValueNNAgent(self)
       self.easy_opp = RandomAgent(distribution='first_buy')
       self.medium_opp = GreedyAgentBoost()
       self.hard_opp = MinMaxAgent()


       self.neptune_monitor = NeptuneMonitor()
       self.network_name = network_name

       self.gems_encoder = GemsEncoder(gems_encoder_dim)
       self.price_encoder = PriceEncoder(price_encoder_dim)
       self.board_encoder = BoardEncoder(self.gems_encoder,
                                          ManyNoblesEncoder(price_encoder_dim,
                                                            board_nobles_dense1_dim,
                                                            board_nobles_dense2_dim),
                                          ManyCardsEncoder(MAX_CARDS_ON_BORD,
                                                           profit_encoder_dim,
                                                           price_encoder_dim,
                                                           cards_points_dim,
                                                           cards_dense1_dim,
                                                           cards_dense2_dim
                                                           ),
                                          full_board_dense1_dim,
                                          full_board_dense2_dim)
       self.player_encoder = PlayerEncoder(self.gems_encoder,
                                            self.price_encoder,
                                            ManyCardsEncoder(MAX_RESERVED_CARDS,
                                                             profit_encoder_dim,
                                                             price_encoder_dim,
                                                             cards_points_dim,
                                                             cards_dense1_dim,
                                                             cards_dense2_dim
                                                             ),
                                            player_points_dim,
                                            player_nobles_dim,
                                            full_player_dense1_dim,
                                            full_player_dense2_dim)
       active_player_input = PlayersInputGenerator('active_').inputs
       other_player_input = PlayersInputGenerator('other_').inputs
       board_input = self.board_encoder.inputs
       self.inputs = board_input + active_player_input + other_player_input
       board_encoded = self.board_encoder(board_input)
       active_player_encoded = self.player_encoder(active_player_input)
       other_player_encoded = self.player_encoder(other_player_input)
       full_state = Concatenate(axis=-1)([board_encoded, active_player_encoded, other_player_encoded])
       full_state = Dense(full_player_dense1_dim, activation='relu')(full_state)
       final_state = Dense(full_player_dense2_dim, activation='relu')(full_state)
       result = self.final_layer(final_state)
       self.layer = Model(inputs = self.inputs, outputs = final_state, name = 'full_state_splendor_estimator')
       self.network = Model(inputs = self.inputs, outputs = result, name = 'full_state_splendor_estimator')
       self.network.compile(Adam(), loss='mean_squared_error')
       self.params['Model name'] = 'Average pooling model'
       self.params['optimizer_name'] = 'Adam'

   def get_value(self, state):
       prediciton = self.network.predict(self.vectorizer.state_to_input(state))
       return self.final_layer.get_value(prediciton)

   def train_on_mcts_data(self, data_frame, train_epochs:int):
       X = data_frame['state']
       Y = data_frame['mcts_value']
       X = self.vectorizer.many_states_to_input(X)
       Y = self.data_transformer.transform_array(Y)
       fit_history = self.network.fit(X, Y, epochs=train_epochs)
       return fit_history


   def train_network_on_many_sets(self, train_dir=None, validation_file=None, epochs=None, batch_size=None,
                                  test_games=1):
       assert self.network is not None, 'You must create network before training'

       with open(validation_file, 'rb') as f:
           X_val, Y_val = pickle.load(f)

       X_val = self.vectorizer.many_states_to_input(X_val)
       Y_val = self.data_transformer.transform_array(Y_val)
       self.neptune_monitor.reset_epoch_counter()
       file1, file2 = self.gather_data_info(train_dir, validation_file)
       self.start_neptune_experiment(experiment_name=self.network_name, description='Training avg_pool arch network',
                                     neptune_monitor=self.neptune_monitor)
       self.neptune_monitor.log_histograms(file1, file2)
       files_for_training = os.listdir(train_dir)
       for epoch in range(epochs):
           print(f'\n Epoch {epoch}: \n')
           file_epoch = epoch % len(files_for_training)
           X, Y = load_data_for_model(os.path.join(train_dir, files_for_training[file_epoch]))
           X = self.vectorizer.many_states_to_input(X)
           Y = self.data_transformer.transform_array(Y)
           self.network.fit(x=X, y=Y, epochs=1, batch_size=batch_size,
                            validation_data=(X_val, Y_val),
                            callbacks=[self.neptune_monitor])
           del X
           del Y

       neptune.stop()

   def dump_weights(self, file_name):
       self.network.save_weights(file_name)

   def load_weights(self, file_name):
       self.network.load_weights(file_name)

   def gather_data_info(self, train_dir, validation_file):
       list_of_files = os.listdir(train_dir)
       example_file = list_of_files[0]
       with open(os.path.join(train_dir, example_file), 'rb') as f1:
           _, Y_ex = pickle.load(f1)
       with open(validation_file, 'rb') as f2:
           _, Y_val = pickle.load(f2)
       self.params['train set size'] = len(Y_ex)
       self.params['valid set size'] = len(Y_val)
       file1 = os.path.join('temp', 'train_hist.png')
       file2 = os.path.join('temp', 'valid_hist.png')
       Y_ex = self.data_transformer.transform_array(Y_ex)
       Y_val = self.data_transformer.transform_array(Y_val)
       plt.hist(Y_ex, bins=100)
       plt.savefig(file1)
       plt.clf()
       plt.hist(Y_val, bins=100)
       plt.savefig(file2)
       return file1, file2

   def check_performance(self, n_games, opponents):
       performance_results = {}
       if 'easy' in opponents:
           easy_results = self.arena.run_many_duels('deterministic', [self.network_agent, self.easy_opp], n_games,
                                                    shuffle_agents=True)
           _, _, easy_win_rate = easy_results.return_stats()
           performance_results['easy'] = easy_win_rate / n_games
       if 'medium' in opponents:
           medium_results = self.arena.run_many_duels('deterministic', [self.network_agent, self.medium_opp], n_games,
                                                    shuffle_agents=True)
           _, _, medium_win_rate = medium_results.return_stats()
           performance_results['medium'] = medium_win_rate / n_games
       if 'hard' in opponents:
           hard_results = self.arena.run_many_duels('deterministic', [self.network_agent, self.hard_opp], n_games,
                                                    shuffle_agents=True)
           _, _, hard_win_rate = hard_results.return_stats()
           performance_results['hard'] = hard_win_rate / n_games
       return performance_results

   def run_test(self, n_games):
       results = self.check_performance(n_games, ['easy'])
       self.neptune_monitor.log_win_rates(['easy'], results)

   def evaluate_fixed_states(self):
       results = [self.get_value(f_state) for f_state in list_of_fixes_states]
       self.neptune_monitor.log_state_values(results)
Ejemplo n.º 7
0
    def init_objects(self, robot, map_file):
        self._robot = copy.deepcopy(robot)
        self._real_map = Arena(self._robot)
        ArenaUtils.load_arena_from_file(self._real_map, map_file)

        self._explore_map = Arena(self._robot)
        self._explore_map.set_allunexplored()

        self.BLOCK_SIZE = 30
        self._waypoint = None

        # Simulator Main Window
        self._arena_size = (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE,
                            ArenaConstant.ARENA_ROW.value * self.BLOCK_SIZE)
        #self._screen = pygame.display.set_mode((self._arena_size[0] * 2, self._arena_size[1]))
        self._screen = pygame.display.set_mode(
            (self._arena_size[0], self._arena_size[1]))
        self._screen.fill((0, 0, 0))

        # Simulator Background
        self._background = pygame.Surface(self._arena_size)
        self._background = self._background.convert()
        self._background.fill((169, 169, 169))

        # Simulator Alerts
        # self._alert = pygame.Surface((9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE))
        # self._alert = self._alert.convert()
        # self._alert.fill((255,255,255))
        # pygame.draw.rect(self._alert, (0,0,0), (0, 0, 9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE), 1)

        # Simulator Background
        self._menu = pygame.Surface(self._arena_size)
        self._menu = self._menu.convert()
        self._menu.fill((255, 255, 255))

        # Prepare Background and Arena
        for row in range(ArenaConstant.ARENA_ROW.value):
            for col in range(ArenaConstant.ARENA_COL.value):
                if row == 17 and col == 0:
                    pygame.draw.rect(
                        self._background, (0, 100, 0),
                        (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE,
                         self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3))

                if row == 0 and col == 12:
                    pygame.draw.rect(
                        self._background, (255, 140, 0),
                        (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE,
                         self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3))

                pygame.draw.rect(self._background, (0, 0, 0),
                                 (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE,
                                  self.BLOCK_SIZE, self.BLOCK_SIZE), 1)

        # Simulator Menu
        # self._large_text = pygame.font.Font('freesansbold.ttf',20)
        # self._coverage_ex_menu_surf, self._coverage_ex_menu_rect = self.text_objects("Coverage-Limited Exploration", self._large_text)
        # self._coverage_ex_menu_rect.center = ((self._arena_size[0] / 2, 3 * self._arena_size[1] / 7))
        # self._coverage_rect = pygame.Rect(self._coverage_ex_menu_rect.left - 15, self._coverage_ex_menu_rect.top - 15, self._coverage_ex_menu_rect.width + self.BLOCK_SIZE, self._coverage_ex_menu_rect.height + self.BLOCK_SIZE)
        # pygame.draw.rect(self._menu, (0,0,0), self._coverage_rect, 3)
        # self._menu.blit(self._coverage_ex_menu_surf, self._coverage_ex_menu_rect)

        # self._ex_menu_surf, self._ex_menu_rect = self.text_objects("Exploration", self._large_text)
        # self._ex_menu_rect.center = ((self._arena_size[0] / 2, self._arena_size[1] / 7))
        # self._ex_rect = pygame.Rect.copy(self._coverage_rect)
        # self._ex_rect.center = self._ex_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._ex_rect, 3)
        # self._menu.blit(self._ex_menu_surf, self._ex_menu_rect)

        # self._timed_ex_menu_surf, self._timed_ex_menu_rect = self.text_objects("Timed Exploration", self._large_text)
        # self._timed_ex_menu_rect.center = ((self._arena_size[0] / 2, 2 * self._arena_size[1] / 7))
        # self._timed_rect = pygame.Rect.copy(self._coverage_rect)
        # self._timed_rect.center = self._timed_ex_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._timed_rect, 3)
        # self._menu.blit(self._timed_ex_menu_surf, self._timed_ex_menu_rect)

        # self._wp_menu_surf, self._wp_menu_rect = self.text_objects("Add Waypoint", self._large_text)
        # self._wp_menu_rect.center = ((self._arena_size[0] / 2, 4 * self._arena_size[1] / 7))
        # self._wp_rect = pygame.Rect.copy(self._coverage_rect)
        # self._wp_rect.center = self._wp_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._wp_rect, 3)
        # self._menu.blit(self._wp_menu_surf, self._wp_menu_rect)

        # self._fp_menu_surf, self._fp_menu_rect = self.text_objects("Fastest Path", self._large_text)
        # self._fp_menu_rect.center = ((self._arena_size[0] / 2, 5 * self._arena_size[1] / 7))
        # self._fp_rect = pygame.Rect.copy(self._coverage_rect)
        # self._fp_rect.center = self._fp_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3)
        # self._menu.blit(self._fp_menu_surf, self._fp_menu_rect)

        # self._mdf_menu_surf, self._mdf_menu_rect = self.text_objects("Generate MDF", self._large_text)
        # self._mdf_menu_rect.center = ((self._arena_size[0] / 2, 6 * self._arena_size[1] / 7))
        # self._mdf_rect = pygame.Rect.copy(self._coverage_rect)
        # self._mdf_rect.center = self._mdf_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._mdf_rect, 3)
        # self._menu.blit(self._mdf_menu_surf, self._mdf_menu_rect)

        self._screen.blit(self._background, (0, 0))
        #self._screen.blit(self._alert, (3 * self.BLOCK_SIZE, 8 * self.BLOCK_SIZE))
        self._screen.blit(self._menu,
                          (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE, 0))
        pygame.display.update()
Ejemplo n.º 8
0
    def __init__(self, environment_id='gym_splendor_code:splendor-v0'):

        self.environment_id = environment_id
        self.progress_bar = None
        self.local_arena = Arena()
Ejemplo n.º 9
0
main_process = MPI.COMM_WORLD.Get_rank() == 0

do = 1

if do == 1:
    moominer = ValueFunctionOptimizer()
    time_s = time.time()
    val = moominer.eval_metrics(100)
    if main_process:
        print(f'Time taken = {time.time() - time_s}')
        print(f'value  = {val}')

if do == 2:
    a1 = GreedyAgentBoost()
    a2 = ValueFunctionAgent()
    arek = ArenaMultiThread()
    res = arek.run_many_games('deterministic', [a1, a2], 100)
    if main_process:
        print(res)

if do == 3:
    from agents.random_agent import RandomAgent
    from agents.value_function_agent import ValueFunctionAgent

    a1 = RandomAgent()
    a2 = ValueFunctionAgent()

    from arena.arena import Arena

    arek = Arena()
    arek.run_one_duel('deterministic', [a1, a2], render_game=True)
Ejemplo n.º 10
0
 def __init__(self, alpha):
     self.agent = QValueAgent()
     self.env = gym_open_ai.make('splendor-v0')
     self.weights_token = 'weights_' + str(random.randint(0,1000000)) + '.h5'
     self.arena = Arena()
     self.alpha = alpha
Ejemplo n.º 11
0
class ArenaMultiThread:
    def __init__(self, environment_id='gym_splendor_code:splendor-v0'):

        self.environment_id = environment_id
        self.progress_bar = None
        self.local_arena = Arena()

    def create_progress_bar(self, lenght):
        if main_thread and USE_TQDM:
            self.progress_bar = tqdm(total=lenght, postfix=None)

    def set_progress_bar(self, value):
        if main_thread and USE_TQDM:
            self.progress_bar.n = min(value, self.progress_bar.total - 1)
            self.progress_bar.update()

    def start_collecting_states(self):
        self.local_arena.start_collecting_states()

    def collect_only_from_middle_game(self, n_min_actions, dump_probability):
        self.local_arena.collect_only_from_middle_game(n_min_actions,
                                                       dump_probability)

    def stop_collecting_states(self):
        self.local_arena.start_collecting_states()

    def dump_collected_states(self, filename, folder):
        self.local_arena.dump_collected_states(filename, folder, my_rank)

    def return_collected_states(self):
        return self.local_arena.collect_states_df

    def collected_states_to_csv(self, filename):
        self.local_arena.collected_states_to_csv(filename, my_rank)

    def one_group_vs_other_duels(self,
                                 mode,
                                 list_of_agents1: List[Agent],
                                 list_of_agents2: List[Agent],
                                 games_per_duel: int,
                                 shuffle: bool = True):

        #create all pairs to fightd
        all_pairs = list(product(list_of_agents1, list_of_agents2))
        pairs_to_duel = [pair for pair in all_pairs if pair[0] != pair[1]]
        #create list of jobs:
        list_of_jobs = pairs_to_duel * games_per_duel
        #calculate jobs per thread:
        jobs_per_thread = int(len(list_of_jobs) / n_proc)
        remaining_jobs = len(list_of_jobs) % n_proc
        #create local arena
        local_results = GameStatisticsDuels(list_of_agents1, list_of_agents2)
        add_remaining_job = int(my_rank < remaining_jobs)

        #create progress bar
        self.create_progress_bar(len(list_of_jobs))

        for game_id in range(0, jobs_per_thread + add_remaining_job):
            if main_thread:
                pass
                #print(f'game_id = {game_id}')
            pair_to_duel = list_of_jobs[game_id * n_proc + my_rank]
            if shuffle:
                starting_agent_id = random.choice(range(2))
            one_game_results = self.local_arena.run_one_duel(
                mode, list(pair_to_duel))
            local_results.register(one_game_results)
            if main_thread:
                self.set_progress_bar((game_id + 1) * n_proc)

        #gather all results
        cumulative_results_unprocessed = comm.gather(local_results, root=0)
        if main_thread:
            cumulative_results = GameStatisticsDuels(list_of_agents1,
                                                     list_of_agents2)
            for one_thread_results in cumulative_results_unprocessed:
                cumulative_results.register(one_thread_results)

            return cumulative_results

    def run_many_games(self, mode, list_of_agents, n_games):
        return self.one_group_vs_other_duels(mode, [list_of_agents[0]],
                                             [list_of_agents[1]],
                                             games_per_duel=n_games,
                                             shuffle=True)

    def all_vs_all(self, mode, list_of_agents, n_games):
        return self.one_group_vs_other_duels(mode,
                                             list_of_agents,
                                             list_of_agents,
                                             games_per_duel=n_games)
Ejemplo n.º 12
0
class QLearningTrainer:

    def __init__(self, alpha):
        self.agent = QValueAgent()
        self.env = gym_open_ai.make('splendor-v0')
        self.weights_token = 'weights_' + str(random.randint(0,1000000)) + '.h5'
        self.arena = Arena()
        self.alpha = alpha


    def _set_token(self, token):
        self.weights_token = token

    def _get_token(self):
        return self.weights_token

    def _save_weights(self):
        self.agent.model.save_weights(self.weights_token)

    def _load_weights(self):
        self.agent.model.load_weights(self.weights_token)

    def new_value_formula(self, old_value, best_value, winner_id, reward, alpha):
        if winner_id is not None:
            return reward
        if winner_id is None:
            if old_value and best_value is not None:
                return (1-alpha)*old_value + alpha*best_value
            else:
                return None


    def run_one_game_and_collect_data(self, debug_info=True):

        there_was_no_action = False
        self.agent.train_mode()
        last_actual_player_0 = None
        last_actual_player_1 = None
        last_state_player_0 = None
        last_state_player_1 = None
        last_action_vec_player_0 = None
        last_action_vec_player_1 = None
        old_value = None
        old_state = None
        old_action_vec = None
        self.env.reset()
        observation = self.env.show_observation('deterministic')
        is_done = False
        number_of_moves = 0

        debug_collected_data = pd.DataFrame(columns=('active_player_id', 'winner_id', 'reward', 'best_value'))
        collected_data = pd.DataFrame(columns=('state_as_vector', 'value'))
        extra_move_done = False

        while not (is_done and extra_move_done) and number_of_moves < MAX_NUMBER_OF_MOVES:

            if is_done:
                extra_move_done = True

            current_state_as_dict = StateAsDict(self.env.current_state_of_the_game)

            actual_action, actual_eval, best_eval = self.agent.choose_action(observation, [None])
            if actual_action is None:
                there_was_no_action = True
                break
            #print('best value = {}'.format(best_value))
            observation, reward, is_done, info = self.env.step('deterministic', actual_action)
            previous_player_id = self.env.previous_player_id()
            winner_id = info['winner_id']



            if previous_player_id == 0:
                old_value = last_actual_player_0
                old_state = last_state_player_0
                old_action_vec = last_action_vec_player_0

            if previous_player_id == 1:
                old_value = last_actual_player_1
                old_state = last_state_player_1
                old_action_vec = last_action_vec_player_1

            if debug_info:
                state_status = old_state.__repr__() if old_state is not None else 'NONE'
                state_vector = vectorize_state(old_state) if old_state is not None else 'NONE'
                debug_collected_data = debug_collected_data.append({
                                                        'state_ex' : state_status,
                                                        'state_vec' : state_vector,
                                                        'new_value': self.new_value_formula(old_value, best_eval,
                                                                                            winner_id, reward, self.alpha),
                                                        'active_player_id' : self.env.previous_player_id(),
                                                        'winner_id' : winner_id,
                                                        'reward' : reward,
                                                        'best_eval' : best_eval,
                                                        'actual_eval' : actual_eval,
                                                        'old_value': old_value,
                                                        'pa_points' : self.env.previous_players_hand().number_of_my_points()},
                                                        ignore_index=True)


            if old_state is not None:
                collected_data = collected_data.append({'state_as_vector' : vectorize_state(old_state),
                                                        'action_vector' : old_action_vec,
                                                            'value': self.new_value_formula(old_value, best_eval,
                                                                                                winner_id, reward, self.alpha)},
                                                       ignore_index=True)



            if previous_player_id == 0:
                last_actual_player_0 = actual_eval
                last_state_player_0 = current_state_as_dict
                last_action_vec_player_0 = vectorize_action(actual_action)
            if previous_player_id == 1:
                last_actual_player_1 = actual_eval
                last_state_player_1 = current_state_as_dict
                last_action_vec_player_1 = vectorize_action(actual_action)

            #let the opponent move:
            number_of_moves += 1

        if debug_info:
            debug_collected_data.to_csv('debug_info.csv')
        collected_data = collected_data.iloc[0:]
        self.agent.test_mode()
        return collected_data

    def train_network(self, collected_data, epochs):
        #prepare X and Y for training:
        self.agent.model.train_model(data_frame=collected_data, epochs=epochs)

    def run_test(self, opponent: Agent):
        results = self.arena.run_many_duels('deterministic', [self.local_trainer.agent, opponent], n_games=comm.Get_size(),
                                        n_proc_per_agent=1, shuffle=True)
        if main_process:
            print(results)

    def run_training(self, n_iterations, opponent):

        if USE_NEPTUNE:
            neptune.create_experiment('Q learning alpha = '.format(self.alpha))
        experience_replay_buffer = None
        for i in range(n_iterations):
            collected_data, there_was_no_action = self.run_one_game_and_collect_data(debug_info=True)
            if not there_was_no_action:
                self.agent.model.train_model(data_frame=collected_data, epochs=1)
                if experience_replay_buffer is None:
                    experience_replay_buffer = collected_data
                else:
                    experience_replay_buffer = experience_replay_buffer.append(collected_data)
            #Run test
            print('Game number = {}'.format(i))
            if i%20 == 0 and i > 0:
                self.agent.model.train_model(data_frame=experience_replay_buffer, epochs=2)

            if i%100 == 0 and i > 0:
                experience_replay_buffer = None
                print('Clearing buffer')

            if i%10 == 0:
                if USE_NEPTUNE:
                    neptune.send_metric('epsilon', x=self.agent.epsilon)
                results = self.arena.run_many_duels('deterministic', [self.agent, opponent], number_of_games=50)
                print(results)
                if USE_NEPTUNE:
                    for pair in results.data.keys():
                        neptune.send_metric(pair[0] + '_wins', x=i, y=results.data[pair].wins)
                        neptune.send_metric(pair[0] + '_reward', x=i, y=results.data[pair].reward)
                        neptune.send_metric(pair[0] + '_victory_points', x=i, y=results.data[pair].victory_points)



        if USE_NEPTUNE:
            neptune.stop()
Ejemplo n.º 13
0
from agents.random_agent import RandomAgent
from agents.greedy_agent_boost import GreedyAgentBoost

from agents.minmax_agent import MinMaxAgent
from agents.greedysearch_agent import GreedySearchAgent
from arena.arena import Arena

fight_pit = Arena()


# time_profile = cProfile.Profile()
# time_profile.run('fight_pit.run_many_duels([goku, gohan], number_of_games=100)')
# time_profile.dump_stats('optimization1.prof')


n_games = 10

gohan = GreedyAgentBoost(weight = [100,2,2,1,0.1])
print(gohan.name)

goku = RandomAgent(distribution='uniform')
print(fight_pit.run_many_duels("deterministic",[goku, gohan], number_of_games = n_games, shuffle_agents=True))

goku = RandomAgent(distribution='uniform_on_types')
print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True))

goku = RandomAgent(distribution = 'first_buy')
print(fight_pit.run_many_duels([goku, gohan], number_of_games = n_games, shuffle_agents=True))


gohan = GreedyAgentBoost(weight = [100,2.5,1.5,1,0.1])
from agents.random_agent import RandomAgent
from agents.greedy_agent_boost import GreedyAgentBoost

from arena.arena import Arena

fight_pit = Arena()


def run_comparison(n_games=1000):

    gohan = GreedyAgentBoost()

    goku = RandomAgent(distribution='uniform')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    goku = RandomAgent(distribution='uniform_on_types')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    goku = RandomAgent(distribution='first_buy')
    print(
        fight_pit.run_many_duels([goku, gohan],
                                 number_of_games=n_games,
                                 shuffle_agents=True))

    gohan = GreedyAgentBoost(weight=[100, 2.5, 1.5, 1, 0.1])
Ejemplo n.º 15
0
class MultiArena:
    def __init__(self) -> None:

        self.env_initialized = False
        self.name = 'Multi Process Arena'
        self.collect_states_mode = False
        self.local_arena = Arena()

    def initialize_env(
            self,
            environment_id: str = 'gym_splendor_code:splendor-deterministic-v0'
    ):
        """Arena has its private environment to run the game."""
        self.env = gym_open_ai.make(environment_id)

    # def run_multi_process_self_play(self, mode, agent: Agent, render_game = False):
    #
    #     self.local_arena.run_self_play(mode, agent, render_game=render_game, mpi_communicator=comm)

    def run_many_duels(self,
                       mode,
                       list_of_agents: List[Agent],
                       n_games: int,
                       n_proc_per_agent: int,
                       shuffle: bool = True):

        assert n_games > 0, 'Number of games must be positive.'
        assert len(
            list_of_agents) == 2, 'This method can run on exactly two agents.'

        n_process = comm.Get_size()
        my_rank = comm.Get_rank()
        n_proc_per_agent = max(min(n_proc_per_agent, n_proc), 1)

        n_parallel_games = int(n_process / n_proc_per_agent)
        remaining_processes = n_process % n_proc_per_agent
        extra_process_per_game = int(remaining_processes / n_parallel_games)
        remaining_processes_after_all = remaining_processes % n_parallel_games

        colors = []
        for i in range(n_parallel_games):
            if i < remaining_processes_after_all:
                processes_to_add = n_proc_per_agent + extra_process_per_game + 1
                colors += [i] * processes_to_add
            if i >= remaining_processes_after_all:
                processes_to_add = n_proc_per_agent + extra_process_per_game
                colors += [i] * processes_to_add

        my_color = colors[my_rank]
        #set agents colors:
        for agent in list_of_agents:
            agent.set_color(my_color)

        #create communicators:
        new_communicator = comm.Split(my_color)

        #prepare jobs for each group of processes
        n_games_for_one_communicator = int(n_games / n_parallel_games)
        remaining_games = n_games % n_parallel_games

        if my_color < remaining_games:
            my_games = n_games_for_one_communicator + 1
        if my_color >= remaining_games:
            my_games = n_games_for_one_communicator

        local_main = new_communicator.Get_rank() == 0

        if local_main:
            print('My color = {} I have to take = {} games'.format(
                my_color, my_games))
        local_results = GameStatisticsDuels(list_of_agents[:1],
                                            list_of_agents[1:])

        for _ in range(my_games):
            if shuffle:
                starting_agent_id = random.choice(range(2))
            one_game_results = self.local_arena.run_one_duel(
                mode, list_of_agents, mpi_communicator=new_communicator)
            if local_main:
                local_results.register(one_game_results)

        #Gather all results:
        combined_results_list = comm.gather(local_results, root=0)

        if main_process:
            global_results = GameStatisticsDuels(list_of_agents[:1],
                                                 list_of_agents[1:])
            for local_result in combined_results_list:
                global_results.register(local_result)

            return global_results
Ejemplo n.º 16
0
from agents.greedy_agent_boost import GreedyAgentBoost
from agents.random_agent import RandomAgent
from agents.single_mcts_agent import SingleMCTSAgent
from arena.arena import Arena
from monte_carlo_tree_search.evaluation_policies.heura_val import HeuraEvaluator

arek = Arena()
a1 = GreedyAgentBoost()
a2 = SingleMCTSAgent(150, HeuraEvaluator(), 0.4, True, False)

results = arek.run_many_duels('deterministic', [a1, a2], 1, True)
print(results)
Ejemplo n.º 17
0
    def __init__(self) -> None:

        self.env_initialized = False
        self.name = 'Multi Process Arena'
        self.collect_states_mode = False
        self.local_arena = Arena()
Ejemplo n.º 18
0
from agents.random_agent import RandomAgent
from agents.minmax_agent import MinMaxAgent
from agents.greedysearch_agent2 import GreedySearchAgent

from arena.arena import Arena

environment_id = 'gym_splendor_code:splendor-v1'
fight_pit = Arena(environment_id)

goku = RandomAgent(distribution='first_buy')
goku2 = RandomAgent(distribution='uniform')
#gohan = RandomAgent(distribution='uniform_on_types')
#gohan = RandomAgent(distribution='uniform')
#goku = GreedyAgen/t(weight = 0.3)
gohan = GreedySearchAgent(depth=5)
goku = MinMaxAgent(name="MinMax", depth=3)
gohan.name = "g2"
goku.name = "g1"
# profi = cProfile.Profile()
#
# profi.run('(fight_pit.run_many_duels([goku, gohan], number_of_games=50))')
# profi.dump_stats('profi2.prof')

fight_pit.run_one_duel([goku, gohan], render_game=True)

# time_dupa = time.time()
# for i in range(100):
#     print(i)
#     fight_pit = Arena()
#     fight_pit.run_one_duel([goku, gohan], starting_agent_id=0)
# print(time.time() - time_dupa)
Ejemplo n.º 19
0
   def __init__(self,
                gems_encoder_dim : int = None,
                price_encoder_dim : int = None,
                profit_encoder_dim : int = None,
                cards_points_dim: int = None,
                cards_dense1_dim: int = None,
                cards_dense2_dim: int = None,
                board_nobles_dense1_dim : int = None,
                board_nobles_dense2_dim : int = None,
                full_board_dense1_dim: int = None,
                full_board_dense2_dim: int = None,
                player_points_dim: int = None,
                player_nobles_dim: int = None,
                full_player_dense1_dim: int = None,
                full_player_dense2_dim: int = None,
                final_layer= None,
                data_transformer = None,
                network_name: str = None
                ):
       super().__init__()
       self.vectorizer = Vectorizer()
       self.final_layer = final_layer
       self.data_transformer = data_transformer

       self.params['data transormation'] = self.data_transformer.name
       self.params['final layer name'] = self.final_layer.name
       self.params['gems_encoder_dim'] = gems_encoder_dim
       self.params['gems_encoder_dim'] = gems_encoder_dim
       self.params['price_encoder_dim'] = price_encoder_dim
       self.params['profit_encoder_dim'] = profit_encoder_dim
       self.params['cards_points_dim'] = cards_points_dim
       self.params['cards_dense1_dim'] = cards_dense1_dim
       self.params['cards_dense2_dim'] = cards_dense2_dim
       self.params['board_nobles_dense1_dim'] = board_nobles_dense1_dim
       self.params['board_nobles_dense2_dim'] = board_nobles_dense2_dim
       self.params['full_board_dense1_dim']= full_board_dense1_dim
       self.params['full_board_dense2_dim'] = full_board_dense2_dim
       self.params['player_points_dim'] = player_points_dim
       self.params['player_nobles_dim'] = player_nobles_dim
       self.params['full_player_dense1_dim'] = full_player_dense1_dim
       self.params['full_player_dense2_dim']= full_player_dense2_dim


       self.arena = Arena()
       self.network_agent = ValueNNAgent(self)
       self.easy_opp = RandomAgent(distribution='first_buy')
       self.medium_opp = GreedyAgentBoost()
       self.hard_opp = MinMaxAgent()


       self.neptune_monitor = NeptuneMonitor()
       self.network_name = network_name

       self.gems_encoder = GemsEncoder(gems_encoder_dim)
       self.price_encoder = PriceEncoder(price_encoder_dim)
       self.board_encoder = BoardEncoder(self.gems_encoder,
                                          ManyNoblesEncoder(price_encoder_dim,
                                                            board_nobles_dense1_dim,
                                                            board_nobles_dense2_dim),
                                          ManyCardsEncoder(MAX_CARDS_ON_BORD,
                                                           profit_encoder_dim,
                                                           price_encoder_dim,
                                                           cards_points_dim,
                                                           cards_dense1_dim,
                                                           cards_dense2_dim
                                                           ),
                                          full_board_dense1_dim,
                                          full_board_dense2_dim)
       self.player_encoder = PlayerEncoder(self.gems_encoder,
                                            self.price_encoder,
                                            ManyCardsEncoder(MAX_RESERVED_CARDS,
                                                             profit_encoder_dim,
                                                             price_encoder_dim,
                                                             cards_points_dim,
                                                             cards_dense1_dim,
                                                             cards_dense2_dim
                                                             ),
                                            player_points_dim,
                                            player_nobles_dim,
                                            full_player_dense1_dim,
                                            full_player_dense2_dim)
       active_player_input = PlayersInputGenerator('active_').inputs
       other_player_input = PlayersInputGenerator('other_').inputs
       board_input = self.board_encoder.inputs
       self.inputs = board_input + active_player_input + other_player_input
       board_encoded = self.board_encoder(board_input)
       active_player_encoded = self.player_encoder(active_player_input)
       other_player_encoded = self.player_encoder(other_player_input)
       full_state = Concatenate(axis=-1)([board_encoded, active_player_encoded, other_player_encoded])
       full_state = Dense(full_player_dense1_dim, activation='relu')(full_state)
       final_state = Dense(full_player_dense2_dim, activation='relu')(full_state)
       result = self.final_layer(final_state)
       self.layer = Model(inputs = self.inputs, outputs = final_state, name = 'full_state_splendor_estimator')
       self.network = Model(inputs = self.inputs, outputs = result, name = 'full_state_splendor_estimator')
       self.network.compile(Adam(), loss='mean_squared_error')
       self.params['Model name'] = 'Average pooling model'
       self.params['optimizer_name'] = 'Adam'
Ejemplo n.º 20
0
class Simulator:
    def __init__(self, real_run):
        self._real_run = real_run
        pygame.init()

    def init_objects(self, robot, map_file):
        self._robot = copy.deepcopy(robot)
        self._real_map = Arena(self._robot)
        ArenaUtils.load_arena_from_file(self._real_map, map_file)

        self._explore_map = Arena(self._robot)
        self._explore_map.set_allunexplored()

        self.BLOCK_SIZE = 30
        self._waypoint = None

        # Simulator Main Window
        self._arena_size = (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE,
                            ArenaConstant.ARENA_ROW.value * self.BLOCK_SIZE)
        #self._screen = pygame.display.set_mode((self._arena_size[0] * 2, self._arena_size[1]))
        self._screen = pygame.display.set_mode(
            (self._arena_size[0], self._arena_size[1]))
        self._screen.fill((0, 0, 0))

        # Simulator Background
        self._background = pygame.Surface(self._arena_size)
        self._background = self._background.convert()
        self._background.fill((169, 169, 169))

        # Simulator Alerts
        # self._alert = pygame.Surface((9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE))
        # self._alert = self._alert.convert()
        # self._alert.fill((255,255,255))
        # pygame.draw.rect(self._alert, (0,0,0), (0, 0, 9 * self.BLOCK_SIZE, 3 * self.BLOCK_SIZE), 1)

        # Simulator Background
        self._menu = pygame.Surface(self._arena_size)
        self._menu = self._menu.convert()
        self._menu.fill((255, 255, 255))

        # Prepare Background and Arena
        for row in range(ArenaConstant.ARENA_ROW.value):
            for col in range(ArenaConstant.ARENA_COL.value):
                if row == 17 and col == 0:
                    pygame.draw.rect(
                        self._background, (0, 100, 0),
                        (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE,
                         self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3))

                if row == 0 and col == 12:
                    pygame.draw.rect(
                        self._background, (255, 140, 0),
                        (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE,
                         self.BLOCK_SIZE * 3, self.BLOCK_SIZE * 3))

                pygame.draw.rect(self._background, (0, 0, 0),
                                 (col * self.BLOCK_SIZE, row * self.BLOCK_SIZE,
                                  self.BLOCK_SIZE, self.BLOCK_SIZE), 1)

        # Simulator Menu
        # self._large_text = pygame.font.Font('freesansbold.ttf',20)
        # self._coverage_ex_menu_surf, self._coverage_ex_menu_rect = self.text_objects("Coverage-Limited Exploration", self._large_text)
        # self._coverage_ex_menu_rect.center = ((self._arena_size[0] / 2, 3 * self._arena_size[1] / 7))
        # self._coverage_rect = pygame.Rect(self._coverage_ex_menu_rect.left - 15, self._coverage_ex_menu_rect.top - 15, self._coverage_ex_menu_rect.width + self.BLOCK_SIZE, self._coverage_ex_menu_rect.height + self.BLOCK_SIZE)
        # pygame.draw.rect(self._menu, (0,0,0), self._coverage_rect, 3)
        # self._menu.blit(self._coverage_ex_menu_surf, self._coverage_ex_menu_rect)

        # self._ex_menu_surf, self._ex_menu_rect = self.text_objects("Exploration", self._large_text)
        # self._ex_menu_rect.center = ((self._arena_size[0] / 2, self._arena_size[1] / 7))
        # self._ex_rect = pygame.Rect.copy(self._coverage_rect)
        # self._ex_rect.center = self._ex_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._ex_rect, 3)
        # self._menu.blit(self._ex_menu_surf, self._ex_menu_rect)

        # self._timed_ex_menu_surf, self._timed_ex_menu_rect = self.text_objects("Timed Exploration", self._large_text)
        # self._timed_ex_menu_rect.center = ((self._arena_size[0] / 2, 2 * self._arena_size[1] / 7))
        # self._timed_rect = pygame.Rect.copy(self._coverage_rect)
        # self._timed_rect.center = self._timed_ex_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._timed_rect, 3)
        # self._menu.blit(self._timed_ex_menu_surf, self._timed_ex_menu_rect)

        # self._wp_menu_surf, self._wp_menu_rect = self.text_objects("Add Waypoint", self._large_text)
        # self._wp_menu_rect.center = ((self._arena_size[0] / 2, 4 * self._arena_size[1] / 7))
        # self._wp_rect = pygame.Rect.copy(self._coverage_rect)
        # self._wp_rect.center = self._wp_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._wp_rect, 3)
        # self._menu.blit(self._wp_menu_surf, self._wp_menu_rect)

        # self._fp_menu_surf, self._fp_menu_rect = self.text_objects("Fastest Path", self._large_text)
        # self._fp_menu_rect.center = ((self._arena_size[0] / 2, 5 * self._arena_size[1] / 7))
        # self._fp_rect = pygame.Rect.copy(self._coverage_rect)
        # self._fp_rect.center = self._fp_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3)
        # self._menu.blit(self._fp_menu_surf, self._fp_menu_rect)

        # self._mdf_menu_surf, self._mdf_menu_rect = self.text_objects("Generate MDF", self._large_text)
        # self._mdf_menu_rect.center = ((self._arena_size[0] / 2, 6 * self._arena_size[1] / 7))
        # self._mdf_rect = pygame.Rect.copy(self._coverage_rect)
        # self._mdf_rect.center = self._mdf_menu_rect.center
        # pygame.draw.rect(self._menu, (0,0,0), self._mdf_rect, 3)
        # self._menu.blit(self._mdf_menu_surf, self._mdf_menu_rect)

        self._screen.blit(self._background, (0, 0))
        #self._screen.blit(self._alert, (3 * self.BLOCK_SIZE, 8 * self.BLOCK_SIZE))
        self._screen.blit(self._menu,
                          (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE, 0))
        pygame.display.update()

    def add_waypoint(self, pos):
        self._waypoint = pos
        pygame.draw.rect(self._background, (0, 0, 128),
                         (pos[1] * self.BLOCK_SIZE, pos[0] * self.BLOCK_SIZE,
                          self.BLOCK_SIZE, self.BLOCK_SIZE))
        pygame.draw.rect(self._background, (0, 0, 0),
                         (pos[1] * self.BLOCK_SIZE, pos[0] * self.BLOCK_SIZE,
                          self.BLOCK_SIZE, self.BLOCK_SIZE), 1)
        self._screen.blit(self._background, (0, 0))
        pygame.display.update()

    def text_objects(self, text, font):
        text_surface = font.render(text, True, (0, 0, 0))
        return text_surface, text_surface.get_rect()

    def get_key(self):
        while 1:
            event = pygame.event.poll()
            if event.type == KEYDOWN:
                return event.key
            else:
                pass

    def display_box(self, screen, message):
        "Print a message in a box in the middle of the screen"
        fontobject = pygame.font.Font(None, 18)
        pygame.draw.rect(screen, (0, 0, 0),
                         ((screen.get_width() / 2) - 100,
                          (screen.get_height() / 2) - 10, 200, 20), 0)
        pygame.draw.rect(screen, (255, 255, 255),
                         ((screen.get_width() / 2) - 102,
                          (screen.get_height() / 2) - 12, 204, 24), 1)
        if len(message) != 0:
            screen.blit(fontobject.render(message, 1, (255, 255, 255)),
                        ((screen.get_width() / 2) - 100,
                         (screen.get_height() / 2) - 10))
        pygame.display.flip()

    def ask(self, screen, question):
        "ask(screen, question) -> answer"
        pygame.font.init()
        current_string = []
        self.display_box(screen, question + ": " + ''.join(current_string))
        while 1:
            inkey = self.get_key()
            if inkey == K_BACKSPACE:
                current_string = current_string[0:-1]
            elif inkey == K_RETURN:
                break
            elif inkey == K_MINUS:
                current_string.append("_")
            elif inkey <= 127:
                current_string.append(chr(inkey))
            self.display_box(screen, question + ": " + ''.join(current_string))
        return ''.join(current_string)

    def run_exploration(self, robot, map_file):
        self.init_objects(robot, map_file)
        while True:
            # _mouse = pygame.mouse.get_pos()

            # if 450 + self._ex_rect.left < _mouse[0] < 450 + self._ex_rect.right and self._ex_rect.top < _mouse[1] < self._ex_rect.bottom:
            # 	pygame.draw.rect(self._menu, (255,0,0), self._ex_rect, 3)
            # else:
            # 	pygame.draw.rect(self._menu, (0,0,0), self._ex_rect, 3)

            # if 450 + self._timed_rect.left < _mouse[0] < 450 + self._timed_rect.right and self._timed_rect.top < _mouse[1] < self._timed_rect.bottom:
            # 	pygame.draw.rect(self._menu, (255,0,0), self._timed_rect, 3)
            # else:
            # 	pygame.draw.rect(self._menu, (0,0,0), self._timed_rect, 3)

            # if 450 + self._coverage_rect.left < _mouse[0] < 450 + self._coverage_rect.right and self._coverage_rect.top < _mouse[1] < self._coverage_rect.bottom:
            # 	pygame.draw.rect(self._menu, (255,0,0), self._coverage_rect, 3)
            # else:
            # 	pygame.draw.rect(self._menu, (0,0,0), self._coverage_rect, 3)

            # if 450 + self._wp_rect.left < _mouse[0] < 450 + self._wp_rect.right and self._wp_rect.top < _mouse[1] < self._wp_rect.bottom:
            # 	pygame.draw.rect(self._menu, (255,0,0), self._wp_rect, 3)
            # else:
            # 	pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3)

            # if 450 + self._fp_rect.left < _mouse[0] < 450 + self._fp_rect.right and self._fp_rect.top < _mouse[1] < self._fp_rect.bottom:
            # 	pygame.draw.rect(self._menu, (255,0,0), self._fp_rect, 3)
            # else:
            # 	pygame.draw.rect(self._menu, (0,0,0), self._fp_rect, 3)

            # if 450 + self._mdf_rect.left < _mouse[0] < 450 + self._mdf_rect.right and self._mdf_rect.top < _mouse[1] < self._mdf_rect.bottom:
            # 	pygame.draw.rect(self._menu, (255,0,0), self._mdf_rect, 3)
            # else:
            # 	pygame.draw.rect(self._menu, (0,0,0), self._mdf_rect, 3)

            # self._screen.blit(self._menu, (ArenaConstant.ARENA_COL.value * self.BLOCK_SIZE,0))
            # pygame.display.update()

            keys = pygame.key.get_pressed()
            for event in pygame.event.get():
                if event.type == QUIT or keys[pygame.K_ESCAPE]:
                    CommMgr.close()
                    pygame.quit()
                    sys.exit()
                elif keys[pygame.K_RETURN]:
                    self.init_objects(robot, map_file)
                    if self._real_run:
                        CommMgr.connect()

                    _explore = Exploration(self._explore_map, self._real_map,
                                           self._robot, 300, 3600,
                                           (self._screen, self._background))
                    _explore.run()

                    if self._real_run:
                        CommMgr.close()