def setup_agents(self): """ Setup agents """ debug_homing.xprint(msg='sim_id: {}, Setup Agents'.format(global_homing.simulation_id)) for agent in self.environment.agents: # Setup agent's location and brain agent.setup(training=self.training, random_agent=self.random_agent) if self.file_to_load != "": debug_homing.xprint(msg='sim_id: {}, Loadind File Setup'.format(global_homing.simulation_id)) if not self.exploration: agent.stop_exploring() # Load model to agent if self.load_model: agent.load_model(self.file_to_load) # Load full weights to agent if self.load_full_weights: agent.load_full_weights(self.file_to_load) # Load 1st hidden layer weights to agent if self.load_h1_weights: agent.load_h1_weights(self.file_to_load) # Load 1st and 2nd hidden layer weights to agent if self.load_h1h2_weights: agent.load_h1h2_weights(self.file_to_load) # ---------------------------------------------------- # Load h2 weights to agent if self.load_h2_weights: agent.load_h2_weights(self.file_to_load) # Load output weights to agent if self.load_out_weights: agent.load_out_weights(self.file_to_load) # Load h2 output weights to agent if self.load_h2out_weights: agent.load_h2out_weights(self.file_to_load) # Load h1 output weights to agent if self.load_h1out_weights: agent.load_h1out_weights(self.file_to_load) # ---------------------------------------------------- # Load memory to agent if self.load_memory != -1: agent.load_mem(self.file_to_load, self.load_memory) # Collect experiences if not self.collect_experiences: agent.stop_collect_experiences()
def __init__(self, sim_param=None, sim_dir="./simulation_data/default/", sim_suffix=""): # -------------------- Simulation Parameters ---------------------- self.render = sim_param.render == 'True' self.can_handle_events = sim_param.render == 'True' # Can handle events only when render is True global_homing.debug = sim_param.debug == 'True' global_homing.record = sim_param.record == 'True' if global_homing.record: debug_homing.xprint(color=PRINT_GREEN, msg="Recording to directory: {}".format(sim_dir)) self.fixed_ur_timestep = sim_param.fixed_ur_timestep == 'True' self.training = sim_param.training == 'True' self.exploration = sim_param.exploration == 'True' self.collect_experiences = sim_param.collect_experiences == 'True' self.max_timesteps = int(sim_param.max_timesteps) self.max_training_it = int(sim_param.max_training_it) self.random_agent = sim_param.random_agent == 'True' self.load_model = sim_param.load_model == 'True' self.load_full_weights = sim_param.load_full_weights == 'True' self.load_h1h2_weights = sim_param.load_h1h2_weights == 'True' self.load_h1_weights = sim_param.load_h1_weights == 'True' self.load_h2_weights = sim_param.load_h2_weights == 'True' self.load_out_weights = sim_param.load_out_weights == 'True' self.load_h2out_weights = sim_param.load_h2out_weights == 'True' self.load_h1out_weights = sim_param.load_h1out_weights == 'True' self.load_memory = int(sim_param.load_mem) self.file_to_load = sim_param.file_to_load self.suffix = sim_suffix self.save_network_freq = int(sim_param.save_network_freq) self.save_network_freq_training_it = int(sim_param.save_network_freq_training_it) self.save_memory_freq = int(sim_param.save_memory_freq) self.start_save_nn_from_it = int(sim_param.start_save_nn_from_it) self.wait_learning_score_and_save_model = float(sim_param.wait_learning_score_and_save_model) self.record_ls = sim_param.record_ls == 'True' # Create environment self.environment = EnvironmentHoming(render=self.render, fixed_ur_timestep=self.fixed_ur_timestep) # Variables self.running = True self.pause = False self.simulation_dir = sim_dir self.simlogs_dir = "" self.simfile_suffix = "homing" self.brain_dir = "" self.ls_dir = "" self.learning_scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to timestep self.goal_reached_count = 0 self.collision_count = 0 self.best_ls = 0
def end_simulation(self): """ Last function called before ending simulation """ debug_homing.xprint(msg='sim_id: {}, Exit'.format(global_homing.simulation_id)) if global_homing.record: global_homing.simlogs_fo.close() debug_homing.xprint(msg="Stop recording") if self.record_ls: self.plot_learning_scores(save_png=False, save_csv=False) if global_homing.debug: print(debug_homing.dico_event) print("At simID: {}, highest ls: {}".format(global_homing.simulation_id, self.best_ls))
def setup_simulation(self, sim_id=1, file_to_load="", h1=-1, h2=-1): if file_to_load != "": self.file_to_load = file_to_load # Set ID of simulation global_homing.simulation_id = sim_id debug_homing.xprint(msg='sim_id: {}, Starting Setup'.format(sim_id)) # Variables self.running = True self.pause = False self.learning_scores = [] # initializing the mean score curve (sliding window of the rewards) with respect to timestep self.goal_reached_count = 0 self.collision_count = 0 # Record simulation self.simlogs_dir = self.simulation_dir + "sim_logs/" if global_homing.record: debug_homing.xprint(msg="sim_id: {}, Start recording".format(sim_id)) filename = global_homing.fileCreate(dir=self.simlogs_dir, suffix=self.simfile_suffix + '_sim' + str(global_homing.simulation_id) + '_' + self.suffix, extension=".csv") global_homing.simlogs_fo = open(filename, 'a') global_homing.simlogs_writer = csv.writer(global_homing.simlogs_fo) # Brain directory self.brain_dir = self.simulation_dir + "brain_files/" + str(global_homing.simulation_id) + "/" # Learning score directory self.ls_dir = self.simulation_dir + "ls_files/" + str(global_homing.simulation_id) + "/" # Setup agents self.setup_agents() debug_homing.xprint(msg='sim_id: {}, Setup complete, Start simulation'.format(sim_id))
def handle_events(self): """ Check and handle the event queue """ if not self.can_handle_events: return for event in pygame.event.get(): if event.type == QUIT or (event.type == KEYDOWN and event.key == K_ESCAPE): """ ESCP: Quit game """ # The user closed the window or pressed escape self.running = False if event.type == KEYDOWN and event.key == K_p: """ P: Pause game """ self.pause = not self.pause # Pause the game if self.pause: debug_homing.xprint(msg='sim_id: {}, Paused simulation'.format(global_homing.simulation_id)) if event.type == KEYDOWN and event.key == K_r: """ R: Record/ Stop record """ if not global_homing.record: # Record simulation debug_homing.xprint(msg="Start recording") global_homing.record = True filename = global_homing.fileCreate(dir=self.simlogs_dir, suffix=self.simfile_suffix + '_sim' + str(global_homing.simulation_id) + '_') global_homing.fo = open(filename, 'a') global_homing.writer = csv.writer(global_homing.simlogs_fo) else: # Stop recording debug_homing.xprint(msg="Stop recording") global_homing.record = False global_homing.simlogs_fo.close() if event.type == KEYDOWN and event.key == K_s: """ S: Save model and memory """ self.environment.agents[0].save_brain(dir=self.brain_dir, suffix=self.suffix) if event.type == KEYDOWN and event.key == K_b: """ B: Load model and Stop training """ self.environment.agents[0].load_model(self.file_to_load) self.environment.agents[0].stop_training() if event.type == KEYDOWN and event.key == K_l: """ L: Load full weights and Stop training """ self.environment.agents[0].load_full_weights(self.file_to_load) self.environment.agents[0].stop_training() if event.type == KEYDOWN and event.key == K_w: """ W: Load h1 weights and Stop training """ self.environment.agents[0].load_h1_weights(self.file_to_load) self.environment.agents[0].stop_training() if event.type == KEYDOWN and event.key == K_z: """ Z: Load h1 h2 weights and Stop training """ self.environment.agents[0].load_h1h2_weights(self.file_to_load) self.environment.agents[0].stop_training()
# Simulation suffix and directory for records simulation_suffix = simulation_suffix(simulation_parameters) simulation_directory = simulation_dir(simulation_parameters) # include simulation suffix # Keep track of total number of timesteps of all simulations total_timesteps = 0 # -------------------- Simulation ---------------------- # Create Testbed testbed = TestbedHoming(sim_param=simulation_parameters, sim_dir=simulation_directory, sim_suffix=simulation_suffix) multi_simulation = int(simulation_parameters.multi_simulation) for i in range(multi_simulation): simID = i + 1 debug_homing.xprint(color=PRINT_GREEN, msg="Start Simulation: {}".format(simID)) testbed.setup_simulation(simID) testbed.run_simulation() testbed.end_simulation() total_timesteps += Global.sim_timesteps # Increment total timesteps global_homing.reset_simulation_global() # Reset global variables print("All simulation finished\n" "Number of simulations: {}\n" "Total simulations time: {}\n" "Total timesteps: {}".format(multi_simulation, Global.get_time(), total_timesteps)) print("highest ls", testbed.best_ls)