def _initialize_episode(self): """Initialization for a new episode. Returns: action: int, the initial action chosen by the agent. """ self.length_rnd = np.random.choice(self.length) self.theta_rnd = np.random.choice(self.theta) if self.reward_pipe[0] == None: self.channel_pipe = open_write_pipe(channel_name) write_to_pipe([self.channel_pipe] * n, [i for i in range(n)]) self.complete_pipe = open_write_pipe("/tmp/complete_down.pipe") write_to_pipe(self.complete_pipe, 1) self.goal_pipe = open_write_pipe(goal_path) write_to_pipe( self.goal_pipe, [round(self.length_rnd, 4), round(self.theta_rnd, 4)]) self.action_pipe = open_write_pipe(action_name_list) self.reset_pipe = open_write_pipe(reset_name_list) self.obs_pipe = open_read_pipe(obs_name_list) self.touch_pipe = open_read_pipe(touch_name_list) self.reward_pipe = open_read_pipe(reward_name_list) self.over_pipe = open_read_pipe(over_name_list) self.terminal_pipe = open_read_pipe(terminal_name_list) # send the number of channel # initial_observation = [json.loads(os.read(obs_pipe, 50000)) for obs_pipe in self.obs_pipe] ''' initial_observation = [read_from_pipe(obs_pipe) for obs_pipe in self.obs_pipe] # print("\ncall initial setp") # list to np.array initial_observation = [np.asarray(observation) for observation in initial_observation] ''' initial_observation = [np.zeros([100, 100, 3])] initial_state = [ np.array(read_from_pipe(state_pipe)) for state_pipe in self.touch_pipe ] # print(initial_state) return [ self._agent.begin_episode(observation, state) for observation, state in zip(initial_observation, initial_state) ]
output("make pipe channel, space, goal") space_pipe = open_write_pipe(space_name) output("open read pipe space") channel_pipe = open_read_pipe(channel_name) output("open read pipe channel") channel = read_from_pipe(channel_pipe, 1) output("read from pipe channel: {}".format(channel)) print(action_num()) if channel == 0: complete_pipe = open_read_pipe("/tmp/complete.pipe") complete = read_from_pipe(complete_pipe, 1) if not complete: print("write space") write_to_pipe(space_pipe, action_space_info()) # os.close(complete_pipe) close_pipe(complete_pipe) print("I AM CHANNEL %s" % channel) '''evaluation use''' goal_pipe = open_read_pipe(goal_name) goal_info = read_from_pipe(goal_pipe) close_pipe(goal_pipe) env.environment.set_goal_position(goal_info) '''.................................''' # head + tail name pipe read_name_list = [(i + "%s.pipe" % (channel + 1)) for i in read_name] write_name_list = [(i + "%s.pipe" % (channel + 1)) for i in write_name] all_path = read_name_list + write_name_list
def _run_one_episode(self): """Executes a full trajectory of the agent interacting with the environment. Returns: The number of steps taken and the total reward. """ step_number = 0 total_reward = 0. action = self._initialize_episode() is_terminal = False n_list = [i for i in range(n)] # Keep interacting until we reach a terminal state. while 1: step_number += 1 # may use list to remove terminal pipe for i in n_list: observation, state, reward, is_terminal = self._run_one_step( i, action[i].tolist()) total_reward += reward # Perform reward clipping. # reward = np.clip(reward, -1, 1) gameover = read_from_pipe(self.over_pipe[i]) if (gameover or step_number == self._max_steps_per_episode): write_to_pipe(self.reset_pipe[i], True) close_pipe([ self.action_pipe[i], self.reset_pipe[i], self.obs_pipe[i], self.touch_pipe[i], self.reward_pipe[i], self.over_pipe[i], self.terminal_pipe[i] ]) self.reward_pipe[i] = None print("GAME_OVER") self._end_episode(reward) n_list.remove(i) time.sleep(0.032) continue # seems not to go into elif condition elif is_terminal: print("TERMINAL") self._agent.end_episode(reward) action[i] = self._agent.begin_episode(observation, state) else: write_to_pipe(self.reset_pipe[i], gameover) action[i] = self._agent.step(reward, observation, state) if len(n_list) == 0: close_pipe(self.channel_pipe) close_pipe(self.complete_pipe) close_pipe(self.goal_pipe) break # self._end_episode(reward) return step_number, total_reward
def __init__(self, base_dir, create_agent_fn, create_environment_fn=create_webots_environment, sticky_actions=True, checkpoint_file_prefix='ckpt', logging_file_prefix='log', log_every_n=1, num_iterations=10, training_steps=1000, evaluation_steps=500, max_steps_per_episode=2700, seed=123): """Initialize the Runner object in charge of running a full experiment. Args: base_dir: str, the base directory to host all required sub-directories. create_agent_fn: A function that takes as args a Tensorflow session and an Atari 2600 Gym environment, and returns an agent. create_environment_fn: A function which receives a game name and creates an Atari 2600 Gym environment. game_name: str, name of the Atari 2600 domain to run. sticky_actions: bool, whether to enable sticky actions in the environment. checkpoint_file_prefix: str, the prefix to use for checkpoint files. logging_file_prefix: str, prefix to use for the log files. log_every_n: int, the frequency for writing logs. num_iterations: int, the iteration number threshold (must be greater than start_iteration). training_steps: int, the number of training steps to perform. evaluation_steps: int, the number of evaluation steps to perform. max_steps_per_episode: int, maximum number of steps after which an episode terminates. This constructor will take the following actions: - Initialize an environment. - Initialize a `tf.Session`. - Initialize a logger. - Initialize an agent. - Reload from the latest checkpoint, if available, and initialize the Checkpointer object. """ assert base_dir is not None # assert game_name is not None self._logging_file_prefix = logging_file_prefix self._log_every_n = log_every_n self._num_iterations = num_iterations self._training_steps = training_steps self._evaluation_steps = evaluation_steps self._max_steps_per_episode = max_steps_per_episode self._base_dir = base_dir self._create_directories() self._summary_writer = tf.compat.v1.summary.FileWriter(self._base_dir) # self._summary_writer = None tf.compat.v1.random.set_random_seed(seed) np.random.seed(seed) self._environment = create_environment_fn(sticky_actions) # Set up a session and initialize variables. self.gpu_options = tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=0.2) self._sess = tf.compat.v1.Session('', config=tf.compat.v1.ConfigProto( allow_soft_placement=True, gpu_options=self.gpu_options)) self.channel_pipe = open_write_pipe(channel_name) write_to_pipe([self.channel_pipe] * n, [i for i in range(n)]) print("write to pipe channel: {}".format([i for i in range(n)])) self.complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(self.complete_pipe, 0) self._agent = create_agent_fn(self._sess, self._environment, summary_writer=self._summary_writer) close_pipe(self.channel_pipe) close_pipe(self.complete_pipe) self._summary_writer.add_graph(graph=tf.get_default_graph()) self._sess.run(tf.global_variables_initializer()) self._sess.run(self._agent._sync_qt_ops) self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix) self.reward_pipe = [None] * n self.length = np.linspace(0.53, 0.7, 4) self.theta = np.linspace(0.3, np.pi / 2, 15)
channel_name = "/tmp/channel_in1.pipe" space_path = "/tmp/space_out1.pipe" goal_path = "/tmp/goal_in1.pipe" action_path = "/tmp/action_in1.pipe" obs_path = "/tmp/obs_out1.pipe" touch_path = "/tmp/touch_out1.pipe" reward_path = "/tmp/reward_out1.pipe" over_path = "/tmp/over_out1.pipe" terminal_path = "/tmp/term_out1.pipe" reset_path = "/tmp/reset_in1.pipe" write_name_list = [action_path, reset_path] read_name_list = [obs_path, touch_path, reward_path, over_path, terminal_path] channel_pipe = open_write_pipe(channel_name) write_to_pipe(channel_pipe, 0) complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(complete_pipe, 0) goal_pipe = open_write_pipe(goal_path) agent_name = 'ddpg' debug_mode = False def create_agent(sess, summary_writer=None): # s = os.open(space_path, os.O_RDONLY) s = open_read_pipe(space_path) # space = json.loads(os.read(s,1024).decode()) space = read_from_pipe(s) close_pipe([channel_pipe, complete_pipe])
make_pipe(channel_name) make_pipe(space_name) make_pipe('/tmp/complete.pipe') space_pipe = open_write_pipe(space_name) channel_pipe = open_read_pipe(channel_name) channel = read_from_pipe(channel_pipe, 1) print(action_num()) if channel == 0: complete_pipe = open_read_pipe("/tmp/complete.pipe") complete = read_from_pipe(complete_pipe, 1) if not complete: print("write space") write_to_pipe(space_pipe, action_space_info()) # os.close(complete_pipe) close_pipe(complete_pipe) print("I AM CHANNEL %s" % channel) # head + tail name pipe read_name_list = [(i + "%s.pipe" % channel) for i in read_name] write_name_list = [(i + "%s.pipe" % channel) for i in write_name] all_path = read_name_list + write_name_list print(all_path) make_pipe(all_path) obs_pipe, touch_pipe, reward_pipe, over_pipe, terminal_pipe = open_write_pipe( write_name_list) action_pipe, reset_pipe = open_read_pipe(read_name_list) '''