def _initialize_episode(self): """Initialization for a new episode. Returns: action: int, the initial action chosen by the agent. """ self.length_rnd = np.random.choice(self.length) self.theta_rnd = np.random.choice(self.theta) if self.reward_pipe[0] == None: self.channel_pipe = open_write_pipe(channel_name) write_to_pipe([self.channel_pipe] * n, [i for i in range(n)]) self.complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(self.complete_pipe, 1) self.goal_pipe = open_write_pipe(goal_path) write_to_pipe( self.goal_pipe, [round(self.length_rnd, 4), round(self.theta_rnd, 4)]) self.action_pipe = open_write_pipe(action_name_list) self.reset_pipe = open_write_pipe(reset_name_list) self.obs_pipe = open_read_pipe(obs_name_list) self.touch_pipe = open_read_pipe(touch_name_list) self.reward_pipe = open_read_pipe(reward_name_list) self.over_pipe = open_read_pipe(over_name_list) self.terminal_pipe = open_read_pipe(terminal_name_list) # send the number of channel # initial_observation = [json.loads(os.read(obs_pipe, 50000)) for obs_pipe in self.obs_pipe] ''' initial_observation = [read_from_pipe(obs_pipe) for obs_pipe in self.obs_pipe] # print("\ncall initial setp") # list to np.array initial_observation = [np.asarray(observation) for observation in initial_observation] ''' initial_observation = [np.zeros([100, 100, 3])] initial_state = [ np.array(read_from_pipe(state_pipe)) for state_pipe in self.touch_pipe ] # print(initial_state) return [ self._agent.begin_episode(observation, state) for observation, state in zip(initial_observation, initial_state) ]
obs_name = "/tmp/obs_out" touch_name = "/tmp/touch_out" reward_name = "/tmp/reward_out" over_name = "/tmp/over_out" terminal_name = "/tmp/term_out" reset_name = "/tmp/reset_in" read_name = [action_name, reset_name] write_name = [obs_name, touch_name, reward_name, over_name, terminal_name] output("ready to make pipe") make_pipe(channel_name) make_pipe(space_name) make_pipe(goal_name) make_pipe('/tmp/complete.pipe') output("make pipe channel, space, goal") space_pipe = open_write_pipe(space_name) output("open read pipe space") channel_pipe = open_read_pipe(channel_name) output("open read pipe channel") channel = read_from_pipe(channel_pipe, 1) output("read from pipe channel: {}".format(channel)) print(action_num()) if channel == 0: complete_pipe = open_read_pipe("/tmp/complete.pipe") complete = read_from_pipe(complete_pipe, 1) if not complete: print("write space") write_to_pipe(space_pipe, action_space_info()) # os.close(complete_pipe) close_pipe(complete_pipe)
def __init__(self, base_dir, create_agent_fn, create_environment_fn=create_webots_environment, sticky_actions=True, checkpoint_file_prefix='ckpt', logging_file_prefix='log', log_every_n=1, num_iterations=10, training_steps=1000, evaluation_steps=500, max_steps_per_episode=2700, seed=123): """Initialize the Runner object in charge of running a full experiment. Args: base_dir: str, the base directory to host all required sub-directories. create_agent_fn: A function that takes as args a Tensorflow session and an Atari 2600 Gym environment, and returns an agent. create_environment_fn: A function which receives a game name and creates an Atari 2600 Gym environment. game_name: str, name of the Atari 2600 domain to run. sticky_actions: bool, whether to enable sticky actions in the environment. checkpoint_file_prefix: str, the prefix to use for checkpoint files. logging_file_prefix: str, prefix to use for the log files. log_every_n: int, the frequency for writing logs. num_iterations: int, the iteration number threshold (must be greater than start_iteration). training_steps: int, the number of training steps to perform. evaluation_steps: int, the number of evaluation steps to perform. max_steps_per_episode: int, maximum number of steps after which an episode terminates. This constructor will take the following actions: - Initialize an environment. - Initialize a `tf.Session`. - Initialize a logger. - Initialize an agent. - Reload from the latest checkpoint, if available, and initialize the Checkpointer object. """ assert base_dir is not None # assert game_name is not None self._logging_file_prefix = logging_file_prefix self._log_every_n = log_every_n self._num_iterations = num_iterations self._training_steps = training_steps self._evaluation_steps = evaluation_steps self._max_steps_per_episode = max_steps_per_episode self._base_dir = base_dir self._create_directories() self._summary_writer = tf.compat.v1.summary.FileWriter(self._base_dir) # self._summary_writer = None tf.compat.v1.random.set_random_seed(seed) np.random.seed(seed) self._environment = create_environment_fn(sticky_actions) # Set up a session and initialize variables. self.gpu_options = tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=0.2) self._sess = tf.compat.v1.Session('', config=tf.compat.v1.ConfigProto( allow_soft_placement=True, gpu_options=self.gpu_options)) self.channel_pipe = open_write_pipe(channel_name) write_to_pipe([self.channel_pipe] * n, [i for i in range(n)]) print("write to pipe channel: {}".format([i for i in range(n)])) self.complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(self.complete_pipe, 0) self._agent = create_agent_fn(self._sess, self._environment, summary_writer=self._summary_writer) close_pipe(self.channel_pipe) close_pipe(self.complete_pipe) self._summary_writer.add_graph(graph=tf.get_default_graph()) self._sess.run(tf.global_variables_initializer()) self._sess.run(self._agent._sync_qt_ops) self._initialize_checkpointer_and_maybe_resume(checkpoint_file_prefix) self.reward_pipe = [None] * n self.length = np.linspace(0.53, 0.7, 4) self.theta = np.linspace(0.3, np.pi / 2, 15)
# share pipe in every slave channel_name = "/tmp/channel_in1.pipe" space_path = "/tmp/space_out1.pipe" goal_path = "/tmp/goal_in1.pipe" action_path = "/tmp/action_in1.pipe" obs_path = "/tmp/obs_out1.pipe" touch_path = "/tmp/touch_out1.pipe" reward_path = "/tmp/reward_out1.pipe" over_path = "/tmp/over_out1.pipe" terminal_path = "/tmp/term_out1.pipe" reset_path = "/tmp/reset_in1.pipe" write_name_list = [action_path, reset_path] read_name_list = [obs_path, touch_path, reward_path, over_path, terminal_path] channel_pipe = open_write_pipe(channel_name) write_to_pipe(channel_pipe, 0) complete_pipe = open_write_pipe("/tmp/complete.pipe") write_to_pipe(complete_pipe, 0) goal_pipe = open_write_pipe(goal_path) agent_name = 'ddpg' debug_mode = False def create_agent(sess, summary_writer=None): # s = os.open(space_path, os.O_RDONLY) s = open_read_pipe(space_path) # space = json.loads(os.read(s,1024).decode()) space = read_from_pipe(s)