def main(config): env = gym.make(config['env_name']) # env.env.reward_type = "dense" agent = Agent(env) rewards = [] success_rates = [] agent.reset() for i in range(config['training_episodes']): if config['save_periodically']: if i > 0 and i % 10000 == 0: AgentUtils.save(agent, rewards, success_rates) if i > 0 and i % 250 == 0: success_rate = Evaluator.test_agent(env, agent) print("Success rate after {} episodes: {}".format(i, success_rate)) success_rates.append(success_rate) train = (i % 16 == 0) total_reward = agent.run(train) rewards.append(total_reward) if config['print_stats']: print_episode_stats(i, config['training_episodes'], total_reward) if config['save_experiment']: AgentUtils.save(agent, rewards, success_rates) if config['make_total_reward_plot']: plot_total_rewards(rewards, config['training_episodes'], avg=100)
def main(config): save_experiment = config['main']['save_experiment'] print_stats = config['main']['print_stats'] make_total_reward_plot = config['main']['make_total_reward_plot'] load_agent_model = config['main']['load_agent_model'] training_episodes = config['main']['training_episodes'] env = HouseEnergyEnvironment(collect_stats=print_stats) agent = Agent(env=env) model_id = None if load_agent_model: model_id = input('Enter model number to load:\n') AgentUtils.load(agent, model_id) # --- learning --- rewards = [] for i in range(training_episodes): if config['main']['save_periodically']: if i > 0 and i % 1000 == 0: model_id = AgentUtils.save(agent, rewards, model_id) t_reward = agent.run() rewards.append(t_reward) print("episode {} / {} | Reward: {}".format(i, training_episodes, t_reward)) if print_stats: print_episode_stats(agent.get_episode_stats(), env.get_episode_stats()) if make_total_reward_plot: plot_total_rewards(rewards, training_episodes, avg=10) if save_experiment: AgentUtils.save(agent, rewards, model_id)
def test_save_index_0(self): """Save model to first free index, knowing that 0 is free""" AgentUtils.save(self.agent, rewards=None, old_id=self.load_id) self.assertTrue(os.path.exists(self.save_path)) with open(self.save_path + '/configuration.json') as config_file: config = json.load(config_file) self.assertTrue(config['test'])
def main(): env = gym.make("FetchPush-v1") # env = gym.make("FetchReach-v1") agent = Agent(env) model_id = input('Model ID:\n') agent.reset() AgentUtils.load(agent, model_id) while True: _run_presentation(agent, env)
def manual_testing(self): """Runs manual testing menu to check project integrity. User can choose actions and other menu options, what allows to check correct behaviour of environment. Runs in console. """ curr_state = last_state = self.env.get_current_state() # create len(curr_state) lists for plots values_for_plt = [[] for _ in curr_state.keys()] step = 0 file_auto_log = False log_file = open("Manual_Tests.log", "a") while True: # Print Main Menu print(self._draw_menu(file_auto_log, step)) # Print State Values state_menu = self._draw_state(curr_state, last_state) print(state_menu) # Update lists for plots serialized_state = self.env.serialize_state(curr_state.copy()) for i in range(len(serialized_state)): values_for_plt[i].append(serialized_state[i]) if file_auto_log: log_file.write(state_menu) # Selecting option try: option = input('\nSelect option:\n') if int(option) in range(1, len(self.actions) + 1): last_state = curr_state # pass the action with the step & inc step counter self.env.step(self.actions[int(option) - 1]) curr_state = self.env.get_current_state() step += 1 if file_auto_log: log_file.write('\nCurrent step: {0}\n' 'Chosen action: {1}\n'.format( step, self.actions[int(option) - 1])) elif int(option) == len(self.actions) + 1: file_auto_log = not file_auto_log if file_auto_log: log_file.write('\n----- Logging ON ----\n\n') else: log_file.write('\n----- Logging OFF ----\n\n') elif int(option) == len(self.actions) + 2: skip_list = [ int(x) for x in input( 'Enter indexes separated by space ' 'which should be skipped on plot:\n').split() ] for i, key in enumerate( self.env.get_current_state().keys()): if i not in skip_list: plt.plot(values_for_plt[i], label=key) plt.legend() plt.show() elif int(option) == len(self.actions) + 3: time = float(input('Pass time in hour:\n')) while time - self.env.world.time_step_in_minutes / 60 >= 0: last_state = curr_state # pass the action with the step self.env.step('action_nop') curr_state = self.env.get_current_state() serialized_state = self.env.serialize_state( curr_state.copy()) step += 1 # update lists for plots for i, val in enumerate(serialized_state): values_for_plt[i].append(val) time -= self.env.world.time_step_in_minutes / 60 if file_auto_log: log_file.write( '\nCurrent step: {0}\n' 'After waiting (nop action) for: {1} hours\n'. format(step, time)) elif int(option) == len(self.actions) + 4: model_id = input('Enter model number to load\n') AgentUtils.load(self.agent, model_id) print('Model {} was successfully loaded.'.format( str(model_id))) if file_auto_log: log_file.write( 'Model {} was successfully loaded.'.format( str(model_id))) elif int(option) == len(self.actions) + 5: last_state = curr_state serialized_state = self.env.serialize_state( curr_state.copy()) # let agent decide here for one action action_index = \ self.agent.get_next_action_greedy(serialized_state) self.env.step(self.actions[action_index]) curr_state = self.env.get_current_state() step += 1 print('Agent decided to do: {}'.format( self.actions[action_index])) if file_auto_log: log_file.write('\nCurrent step: {0}\n' 'Agent decided to do: {1}\n'.format( step, self.actions[action_index])) elif int(option) == len(self.actions) + 6: step = 0 self.env.reset() last_state = curr_state = self.env.get_current_state() for i in values_for_plt: i.clear() if file_auto_log: log_file.write('Reset environment.\n') elif int(option) == len(self.actions) + 7: break else: raise ValueError() except ValueError: print("Invalid option!") if file_auto_log: log_file.write(self._draw_state(curr_state, last_state)) # while end, close file and save logs log_file.close()
def test_load_given_id_test(self): """Load agent with given id""" AgentUtils.load(self.agent, self.load_id) with open(self.load_path + '/configuration.json') as config_file: config = json.load(config_file) self.assertTrue(config['test'])
def __init__(self, width=None, height=None, model=1): """Configuration for simulation object This method is divided into two parts, the "view" and the "model", roughly resembling view and model responsibilities in MVC model. Since pygame doesn't allow complicated design pattern and introduces its own event-render-loop mechanism, this is only for clearness. Args: width(int) = simulation width in pixels. height(int) = simulation height in pixels. fps(int) = frames per second, which is also the rate of making world steps. model(int) = number of model to be used. To apply fullscreen, simply leave width and height unmodified to None. Using different values is discouraged and could potentially cause errors. Note: if you are using multiple monitors, you NEED to specify window width and height, otherwise fullscreen will span across all monitors. """ pygame.init() pygame.display.set_caption("Press ESC to quit, SPACE to pause") # --- view settings --- self.screen = pygame.display.set_mode((0, 0), pygame.FULLSCREEN) if width and height: self.screen = pygame.display.set_mode((width, height), pygame.DOUBLEBUF) if width and height: self.width, self.height = width, height else: self.width, self.height = pygame.display.get_surface().get_size() self.background = pygame.Surface(self.screen.get_size()).convert() self.clock = pygame.time.Clock() # open configuration file add_path = '' if 'tests' in os.getcwd(): add_path = '../' with open(add_path + '../configuration.json') as config_file: self.config = json.load(config_file) self.fps = self.config['simulation']['fps'] self.font = pygame.font.SysFont('mono', 10, bold=True) self.data = dict() self.colors = { 'bg': pygame.Color('#ececec'), # lightgrey 'white': pygame.Color('#ffffff'), 'weather1': pygame.Color('#f1e2bb'), # mellow yellow 'weather2': pygame.Color('#e2ebd1'), # pastel light green 'weather3': pygame.Color('#d0dcdc'), # pastel light blue 'weather4': pygame.Color('#b4c4c2'), # pastel dark blue 'weather5': pygame.Color('#ddcfb3'), # i dont remember really 'font': pygame.Color('#9b9b9b'), # medium grey 'devices1': pygame.Color('#e3dcbb'), # dirty yellow light 'devices2': pygame.Color('#ded5ae'), # ... 'devices3': pygame.Color('#d6cb98'), 'devices4': pygame.Color('#ccbe81'), 'devices5': pygame.Color('#c4b46c'), # dirty yellow dark 'devices0': pygame.Color('#f9f9f9'), # light grey 'intense1': pygame.Color('#b77d6a'), # pastel dark red 'intense2': pygame.Color('#c79b8c'), # pastel light red 'soft1': pygame.Color('#f1e6e2'), # reddish light grey 'soft2': pygame.Color('#e3dcbb'), # yellowish light grey } self.margin = 0.025 * self.height # --- model settings --- self.env = HouseEnergyEnvironment() self.agent = Agent(env=self.env) AgentUtils.load(self.agent, model) self.actions = self.env.get_actions() self.current_state = self.env.reset( world=World(time_step_in_minutes=1, duration_days=None) ) # memory for charts maxlen = 100 self.memory = { 'temperature': { 'values': deque([0] * 100, maxlen=maxlen), 'desires': deque([0] * 100, maxlen=maxlen) }, 'light': { 'values': deque([0] * 100, maxlen=maxlen), 'desires': deque([0] * 100, maxlen=maxlen) } } # zooming for charts self.zoom = 0 # dictionary for colorized icon images self.icons = {}