def load_implementation(self, directory: str): """Loads a previously trained and saved actor policy from directory. The loaded policy may afterwards be used by calling play(). Args: directory: the directory containing the trained policy. """ assert directory assert os.path.isdir(directory) self._agent = Agent.load(directory)
def restore_agent(self, directory: str, filename: str = None): """Deserialize the strategy's learning agent from a file. Arguments: directory: The `str` path of the directory the agent checkpoint is stored in. filename (optional): The `str` path of the file the agent specification is stored in. The `.json` file extension will be automatically appended if not provided. """ self._agent = Agent.load(directory, filename=filename) self._runner = Runner(agent=self._agent, environment=self._environment)
def test_explicit(self): self.start_tests(name='explicit') # default agent, environment = self.prepare() agent.initialize() states = environment.reset() agent.save(directory=self.__class__.directory) agent.close() agent = Agent.load(directory=self.__class__.directory) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.rmdir(path=self.__class__.directory) self.finished_test() # single then parallel agent, environment = self.prepare(memory=50, update=dict(unit='episodes', batch_size=1)) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.save(directory=self.__class__.directory) agent.close() environment.close() agent, environment = self.prepare(timestep_range=(6, 10), update=dict(unit='episodes', batch_size=1), parallel_interactions=2) agent.restore(directory=self.__class__.directory) states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) os.rmdir(path=self.__class__.directory) self.finished_test()
def test_config(self): # FEATURES.MD self.start_tests(name='config') # default saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() agent.close() agent = Agent.load(directory=self.__class__.directory) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # single then parallel saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(memory=50, update=dict(unit='episodes', batch_size=1), saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() agent, environment = self.prepare(timestep_range=(6, 10), update=dict(unit='episodes', batch_size=1), saver=saver, parallel_interactions=2) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # no load saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() saver = dict(directory=self.__class__.directory, load=False) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test()
def test_config_extended(self): self.start_tests(name='config extended') # filename saver = dict(directory=self.__class__.directory, filename='test') agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() agent = Agent.load(directory=self.__class__.directory, filename='test') actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'test.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'test-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta')) os.remove(path=os.path.join(self.__class__.directory, 'test-2.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-2.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-2.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # frequency saver = dict(directory=self.__class__.directory, frequency=1) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() time.sleep(1) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) time.sleep(1) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.'), filename break os.rmdir(path=self.__class__.directory) self.finished_test() # load filename saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() saver = dict(directory=self.__class__.directory, load='agent-0') agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test()
def load(self, model_name): print("Loading model...") self.ppo_agent = Agent.load(directory=model_name, format='hdf5')
def test_config(self): # FEATURES.MD self.start_tests(name='config') # default saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() agent.close() agent = Agent.load(directory=self.__class__.directory) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # parallel then single saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(update=dict(unit='episodes', batch_size=1), saver=saver, parallel_interactions=2) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() agent, _ = self.prepare(saver=saver) agent.initialize() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # filename saver = dict(directory=self.__class__.directory, filename='test') agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() agent = Agent.load(directory=self.__class__.directory, filename='test') actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'test.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'test-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta')) os.remove(path=os.path.join(self.__class__.directory, 'test-2.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-2.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-2.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # frequency saver = dict(directory=self.__class__.directory, frequency=1) agent, environment = self.prepare(saver=saver) agent.initialize() states = environment.reset() time.sleep(1) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) time.sleep(1) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-2.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.'), filename break os.rmdir(path=self.__class__.directory) self.finished_test() # no load saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(saver=saver) restored_agent = copy.deepcopy(agent) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() restored_agent.model.saver_spec['load'] = False restored_agent.initialize() actions = restored_agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) # restored_agent.observe(terminal=terminal, reward=reward) restored_agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test() # load filename saver = dict(directory=self.__class__.directory) agent, environment = self.prepare(saver=saver) restored_agent = copy.deepcopy(agent) agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() restored_agent.model.saver_spec['load'] = 'agent-0' restored_agent.initialize() actions = restored_agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) # restored_agent.observe(terminal=terminal, reward=reward) restored_agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'graph.pbtxt')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-0.meta')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent-1.meta')) for filename in os.listdir(path=self.__class__.directory): os.remove(path=os.path.join(self.__class__.directory, filename)) assert filename.startswith('events.out.tfevents.') break os.rmdir(path=self.__class__.directory) self.finished_test()
"b": -5.0e2, "D": 0.0e-2, "x0": None } # Path to save the figure #fig_path = 'figures/RLControl_De_0_Dt_0_dta_0p05_ep50_dense_12_gre_1_wne_1_grt_1_wnt_1_r_ma1em1.png' fig_path = None # Create instance of complex Stuart-Landau equation environment environment = Langevin2D_Env() environment.env_params = env_params # Load agent agent = Agent.load( directory= "./agents/saver_data_D_0_dta_0p05_maxa_1_ep100_lstm2_6_gr_1_wn_1_r_ma1em2", filename='agent') ############################################################################### # EVALUTATIONS ############################################################################### # Set up control time with reference to simulation time dt_action = 0.05 dt = environment.env_params["dt"] T = environment.env_params["T"] n_env_steps = int(dt_action / dt) n_actions = int(T / dt / n_env_steps) # Initiate environment to initial state time = np.zeros((environment.max_episode_timesteps()))
import argparse parser = argparse.ArgumentParser() parser.add_argument("--theory", help="select an agent type [ppo, vpg, dqn]") args = parser.parse_args() # Create an OpenAI-Gym environment environment = Environment.create(environment='cenv.CustomEnvironment', max_episode_timesteps=100) agent_directory = './' + args.theory + '_agent' #Load the Agent Previously Saved util_agent = Agent.load(directory='./util_agent', format='numpy', environment=environment) deon_agent = Agent.load(directory='./deon_agent', format='numpy', environment=environment) agent = util_agent #Set up the Environment environment.reset() while True: #Get the Current Dilemma for Output and wait for human input dilemma = environment.getCurrentDilemma() input("\n ***Press Enter For Next Dilemma*** \n")
def test(flags): rospy.init_node("rl_algorithm", anonymous=True) main_state = State() config_file = open('configs/config.json') main_state.configs = json.load(config_file) config_file.close() #Publishers drive_announce = rospy.Publisher(main_state.configs['CONTROL_TOPIC'], AckermannDriveStamped, queue_size=1) reset_announce = rospy.Publisher(main_state.configs['RESET_TOPIC'], Bool, queue_size=1) main_state.st_display_pub = rospy.Publisher( main_state.configs['ST_DISPLAY_TOPIC'], Marker, queue_size=10) laser_listen = rospy.Subscriber(main_state.configs['LASER_TOPIC'], LaserScan, parser.laser_parser, main_state, queue_size=1) odom_listen = rospy.Subscriber(main_state.configs['ODOM_TOPIC'], Odometry, parser.odom_parser, main_state, queue_size=1) #Flags for testing if (flags.steps): test_steps = flags.steps else: print("The number of steps must be specified") return if (flags.verbose): main_state.verbose = True environment = PD_Environment(reset_announce, drive_announce, main_state) environment.publish_markers() #Initialize agent #TODO: Consolidate into configs agent = Agent.create(agent="ppo", network=custom_network(), batch_size=10, parallel_interactions=8, environment=environment, max_episode_timesteps=2000, tracking="all") if (flags.load): files = flags.load.split('/') if (len(files) > 1): agent = Agent.load(directory=files[0], filename=files[1], environment=environment, agent=agent) else: agent = Agent.load(directory=flags.load, environment=environment, agent=agent) else: print("A load file must be specified") return #Define the tracking tensor names ST_TENSOR = 'agent/policy/turning_angle_distribution/probabilities' for i in range(test_steps): num_steps = 0 environment.reset() states = parser.assemble_state(main_state) done = False main_state.crash_det = False main_state.lap_finish = False while not done and num_steps < 2000: num_steps += 1 actions = agent.act(states, independent=True) all_probs = agent.tracked_tensors() parser.publish_steering_prob(all_probs[ST_TENSOR], main_state.st_display_pub, main_state.cur_steer) states, done, reward = environment.execute(actions=actions) col_detect(main_state) if (num_steps < 10): done = False if (main_state.crash_det): print("Crashed") if (main_state.lap_finish): print("Lap finished") print("Episode {} done after {}".format(i, num_steps))
def train(flags): main_state = State() #Load config config_file = open('configs/config.json') main_state.configs = json.load(config_file) config_file.close() #Accept flag params if (flags.steps): train_steps = flags.steps else: train_steps = main_state.configs["NUM_RUNS_TOT"] if (flags.save): save_file = flags.save else: save_file = main_state.configs["MODEL_DIR"] if (flags.verbose): main_state.verbose = True if (not flags.lap_time): main_state.default_reward = 0.01 if (flags.entropy): main_state.entropy_reg = flags.entropy else: main_state.entropy_reg = main_state.configs["DEF_ENTROPY"] if (flags.ds_reward): main_state.ds_reward = True else: main_state.ds_reward = False environments = list() for i in range(8): environments.append(Gym_Environment(main_state)) #Initialize the agent agent = Agent.create(agent = "ppo", network = custom_network(),\ environment = environments[0], max_episode_timesteps=2000,\ parallel_interactions = 8,\ learning_rate = 0.002, summarizer = main_state.configs["SUM_DIR"],\ batch_size = 10, entropy_regularization = main_state.entropy_reg) if (flags.load): files = flags.load.split('/') if (len(files) > 1): agent = Agent.load(directory = files[0], filename = files[1],\ max_episode_timesteps = 2000, learning_rate = 0.002,\ summarizer = main_state.configs["SUM_DIR"],\ batch_size = 10,\ environment = environments[0], agent = agent) else: agent = Agent.load(directory = files[0], environment = environments[0],\ max_episode_timesteps = 2000, learning_rate = 0.002,\ summarizer = main_state.configs["SUM_DIR"],\ batch_size = 10,\ agent = agent) runner = Runner(agent = agent, environments = environments, num_parallel = 8,\ remote = 'multiprocessing') if (train_steps <= main_state.configs["SAVE_RUNS"]): runner.run(num_episodes=train_steps, batch_agent_calls=True) agent.save(save_file, format="checkpoint", append="episodes") else: for i in range( int((train_steps - 1) / main_state.configs["SAVE_RUNS"]) + 1): runner.run(num_episodes=main_state.configs["SAVE_RUNS"], batch_agent_calls=True) agent.save(save_file, format="checkpoint", append="episodes")
def train_GUI(flags): #Initialize node rospy.init_node("rl_algorithm", anonymous=True) #Initialize subscribers for laser and odom main_state = State() #TODO: Put config file into flags #Load config config_file = open('configs/config.json') main_state.configs = json.load(config_file) config_file.close() #Subscribers and Publishers laser_listen = rospy.Subscriber(main_state.configs['LASER_TOPIC'], LaserScan, parser.laser_parser, main_state, queue_size=1) odom_listen = rospy.Subscriber(main_state.configs['ODOM_TOPIC'], Odometry, parser.odom_parser, main_state, queue_size=1) info_listen = rospy.Subscriber(main_state.configs['INFO_TOPIC'], RaceInfo, parser.info_parser, main_state, queue_size=1) drive_announce = rospy.Publisher(main_state.configs['CONTROL_TOPIC'], AckermannDriveStamped, queue_size=1) reset_announce = rospy.Publisher(main_state.configs['RESET_TOPIC'], Bool, queue_size=1) main_state.st_display_pub = rospy.Publisher( main_state.configs['ST_DISPLAY_TOPIC'], Marker, queue_size=10) #Publish True to reset_announce to reset the simulator #Accept flag params if (flags.steps): train_steps = flags.steps else: train_steps = main_state.configs["NUM_RUNS_TOT"] if (flags.save): save_file = flags.save else: save_file = main_state.configs["MODEL_DIR"] if (flags.verbose): main_state.verbose = True if (not flags.lap_time): main_state.default_reward = 0.01 if (flags.entropy): main_state.entropy_reg = flags.entropy else: main_state.entropy_reg = main_state.configs["DEF_ENTROPY"] if (flags.ds_reward): main_state.ds_reward = True else: main_state.ds_reward = False # Initialize environment # TODO: Define max_episode_timesteps from CONFIG file #environment = Environment.create( # environment=PD_Environment, max_episode_timesteps=100 #) environment = PD_Environment(reset_announce, drive_announce, main_state) environment.publish_markers() # Initialize Agent agent = Agent.create( agent="ppo", network=custom_network(), batch_size=5, entropy_regularization=main_state.entropy_reg, environment=environment, max_episode_timesteps=2000, learning_rate=0.002, #tracking="all") tracking="all", summarizer=main_state.configs["SUM_DIR"]) if (flags.load): files = flags.load.split('/') if (len(files) > 1): agent = Agent.load(directory=files[0], filename=files[1], environment=environment, agent=agent) else: agent = Agent.load(directory=flags.load, environment=environment, agent=agent) print("Agent loaded from " + flags.load) #The agent network configuration could be printed with agent.get_architecture() # Run the save loop for i in range( int((train_steps - 1) / main_state.configs["SAVE_RUNS"]) + 1): run(environment, agent, main_state, main_state.configs["SAVE_RUNS"], 10000, False) agent.save(save_file, format="checkpoint", append="episodes")
import os import socket import numpy as np import csv from collections import deque from tensorforce.agents import Agent saver_restore = os.getcwd() + "/saver_data/" agent = Agent.load(directory = saver_restore) # If folder does not exist, create it if(not os.path.exists("frequency_response")): os.mkdir("frequency_response") ### System parameters ### # Vortex shedding cycle t_vs = 6.860 # Forcing sampling time t_s = 1.0/100.0 # Action time of controller t_a = 0.5 ### Analysis parameters ### length = 100 start_freq = 1/length stop_freq = 10.0*(1/t_vs) num_freqs = 50
def __init__(self, server_address, name, chosen_team_index=0, output_path=None, rounds_to_play=1, log=False, mode=Mode.TRAIN): super(Bot, self).__init__(server_address, name, chosen_team_index, rounds_to_play) self.mode = mode self.episode = 1 self.stich_number = 0 self.played_cards_in_game = [] self.rejected_cards = [] self.chose_trumpf = False self.out_of_color = np.zeros((3, 4), dtype=np.float) self.avg_stich_reward = 0 self.rejected_per_session = 0 self.avg_rejected_per_session = 0 self.avg_trumpf_selection = np.ones(5, dtype=np.float) / 5 self.avg_trumpf_points = np.zeros(5, dtype=np.float) self.avg_game_points = np.zeros(2, dtype=np.float) if log: self.log_game(output_path) model_path = os.path.join(output_path, 'checkpoints') trumpf_path = os.path.join(output_path, 'trumpf-checkpoints') if mode is Mode.TRAIN: os.makedirs(output_path, exist_ok=True) if os.path.exists(model_path): self.agent = Agent.load(model_path) self.trumpf_agent = Agent.load(trumpf_path) else: self.agent = Agent.create(agent='dqn', states=get_states(), actions=get_actions(), max_episode_timesteps=50, memory=50000, batch_size=32, target_sync_frequency=10, start_updating=10000, exploration=dict( type='decaying', decay='exponential', unit='episodes', num_steps=100000, initial_value=0.2, decay_rate=0.5), learning_rate=dict( type='decaying', decay='exponential', unit='episodes', num_steps=100000, initial_value=0.001, decay_rate=0.75), variable_noise=dict( type='decaying', decay='exponential', unit='episodes', num_steps=100000, initial_value=0.1, decay_rate=0.75), network=[ [dict(type='retrieve', tensors=['cards']), dict(type='conv1d', size=512, window=9, stride=9, padding='valid'), dict(type='conv1d', size=256, window=1, stride=1, padding='valid'), dict(type='flatten'), dict(type='register', tensor='cards-embedding')], [dict(type='retrieve', tensors=['features']), dict(type='dense', size=64, activation='relu'), dict(type='register', tensor='features-embedding')], [dict(type='retrieve', aggregation='concat', tensors=['cards-embedding', 'features-embedding'] ), dict(type='dense', size=512, activation='relu'), dict(type='dense', size=256, activation='relu'), dict(type='dense', size=256, activation='relu')] ], discount=1.0, summarizer=dict( directory=os.path.join(output_path, "summary"), labels=['entropy', 'kl-divergence', 'loss', 'reward', 'update-norm'] ), saver=dict( directory=model_path, frequency=SAVE_EPISODES # save checkpoint every 100 updates ) ) self.trumpf_agent = Agent.create(agent='dqn', states=get_trumpf_states(), actions=get_trumpf_actions(), max_episode_timesteps=2, memory=2000, batch_size=32, target_sync_frequency=10, start_updating=200, exploration=dict( type='decaying', decay='exponential', unit='episodes', num_steps=30000, initial_value=0.2, decay_rate=0.5), learning_rate=dict( type='decaying', decay='exponential', unit='episodes', num_steps=30000, initial_value=0.001, decay_rate=0.75), variable_noise=dict( type='decaying', decay='exponential', unit='episodes', num_steps=100000, initial_value=0.1, decay_rate=0.75), network=[ [dict(type='retrieve', tensors=['cards']), dict(type='conv1d', size=128, window=1, stride=1, padding='valid'), dict(type='conv1d', size=64, window=1, stride=1, padding='valid'), dict(type='flatten'), dict(type='dense', size=128, activation='relu'), dict(type='dense', size=64, activation='relu')] ], discount=1.0, summarizer=dict( directory=os.path.join(output_path, "summary/trumpf"), labels=['entropy', 'kl-divergence', 'loss', 'reward', 'update-norm'] ), saver=dict( directory=trumpf_path, frequency=2 ) ) else: self.agent = Agent.load(model_path) self.trumpf_agent = Agent.load(trumpf_path)
def test_explicit_extended(self): self.start_tests(name='explicit extended') # filename agent, environment = self.prepare() agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.save(directory=self.__class__.directory, filename='test') agent.close() agent = Agent.load(directory=self.__class__.directory, filename='test') actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'test.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.index')) os.remove(path=os.path.join(self.__class__.directory, 'test-1.meta')) os.rmdir(path=self.__class__.directory) self.finished_test() # no timestep agent, environment = self.prepare() agent.initialize() states = environment.reset() actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.save(directory=self.__class__.directory, append_timestep=False) agent.close() agent = Agent.load(directory=self.__class__.directory) actions = agent.act(states=states) states, terminal, reward = environment.execute(actions=actions) agent.observe(terminal=terminal, reward=reward) agent.close() environment.close() os.remove(path=os.path.join(self.__class__.directory, 'agent.json')) os.remove(path=os.path.join(self.__class__.directory, 'checkpoint')) os.remove(path=os.path.join(self.__class__.directory, 'agent.data-00000-of-00001')) os.remove(path=os.path.join(self.__class__.directory, 'agent.index')) os.remove(path=os.path.join(self.__class__.directory, 'agent.meta')) os.rmdir(path=self.__class__.directory) self.finished_test()
check_dir = directory + '/checkpoints/' print('\nTesting {dir}'.format(dir=directory)) old_epochs = 27 agent = Agent.load( directory=check_dir, filename='agent-{oe}'.format(oe=old_epochs - 1), format='hdf5', environment=environment, agent='ppo', network=[ dict(type='lstm', size=lstm_units, horizon=lstm_horizon, activation='relu'), ], baseline=[ dict(type='lstm', size=lstm_units, horizon=lstm_horizon, activation='relu') ], baseline_optimizer=dict(optimizer='adam', learning_rate=baseline_lr), learning_rate=policy_lr, batch_size=batch_size, tracking=['distribution'], discount=discount, states=dict(features=dict(type=float, shape=(147, )), ), actions=dict(type=int, num_values=num_actions + num_classes)) # Parameters for test loop episode = 0 correct = 0
update_frequency=steps_per_episode) runner = Runner(agent=agent, environment=env) runner.run(num_episodes=num_episodes, evaluation=False) agent.save(directory='minigrid_checkpoints/{env}/'.format(env=env_name), filename='model-{ep}-{env}'.format(ep=num_episodes, env=env_name)) ########### TEST with visualization ############# print('Testing agent') if env_name == 'MiniGrid-DistShift1-v0': env_name = 'MiniGrid-DistShift2-v0' env = wrappers.gym.make(env_name) env = wrappers.ImgObsWrapper(env) num_actions = env.action_space.n env = Environment.create(environment=env, max_episode_timesteps=steps_per_episode, states=dict(type='float', shape=obs_shape), actions=dict(type='int', num_values=num_actions), visualize=True) # Agent creation agent = Agent.load( directory='minigrid_checkpoints/{env}/'.format(env=env_name), filename='model-{ep}-{env}-1.data-00000-of-00001'.format( ep=num_episodes, env=env_name), environment=env) runner = Runner(agent=agent, environment=env) runner.run(num_episodes=50, evaluation=True)
agent = Agent.load( directory=directory, filename='agent-{x}'.format(x=old_episodes), format='hdf5', environment=environment, agent='ppo', network=[ # First module: shared dense block [ dict(type='dense', size=64, activation='relu'), dict(type='dense', size=64, activation='relu'), dict(type='dense', size=64, activation='relu'), dict(type='lstm', size=64, horizon=steps_per_episode, activation='relu'), ], ], baseline=[ dict(type='dense', size=64, activation='relu'), dict(type='dense', size=64, activation='relu') ], baseline_optimizer=dict(optimizer='adam', learning_rate=baseline_lr), learning_rate=policy_lr, batch_size=10, tracking=['distribution'], discount=0.99, states=dict( # 64 features + 3 positional coding features=dict(type=float, shape=(67, )), ), actions=dict(movement=dict(type=int, num_values=num_actions), classification=dict(type=int, num_values=len(class_names))), entropy_regularization=0.01, # exploration=0.1 )
# max_episode_timesteps=1e6 ) runner.run(num_episodes=num_episodes) t_ag.save(directory='model-numpy', format='numpy', append='episodes') # Close agent separately, since created separately #runner.run(num_episodes=100, evaluation=True) t_ag.close() t_env.close() runner.close() #%% plot results df = dl_utils.create_df_of_outputs(test_env.swmm_env, route_step) fig_name = '3_using_runner_' + str(num_episodes) + '_episodes' dl_utils.plt_key_states(fig_name, df, test_env.swmm_env) #%% testing #load trained model train_env = dl_utils.custom_tensorflow_env(model_name='theta_test', threshold=threshold, scaling=scaling) route_step = test_env.swmm_env.env.sim._model.getSimAnalysisSetting( tkai.SimulationParameters.RouteStep.value) tr_env = Environment.create(environment=train_env) t_ag = Agent.load(directory='model-numpy', format='numpy', environment=environment)