Beispiel #1
0
    def _run_one_step(self, channel, action):
        """Executes a single step in the environment.

    Args:
      action: int, the action to perform in the environment.

    Returns:
      The observation, reward, and is_terminal values returned from the
        environment.
    """

        write_to_pipe(self.action_pipe[channel], action)
        touch_data = read_from_pipe(self.touch_pipe[channel])
        reward = read_from_pipe(self.reward_pipe[channel])
        is_terminal = read_from_pipe(self.terminal_pipe[channel])

        # observation = json.loads(os.read(self.obs_pipe[channel], 50000).decode())
        '''
    observation = read_from_pipe(self.obs_pipe[channel])
    observation = np.asarray(observation)
    '''
        observation = np.zeros([100, 100, 3])
        state = np.array(touch_data)
        # print('state ', state)
        return observation, state, reward, is_terminal
Beispiel #2
0
def create_agent(sess, summary_writer=None):

    # s = os.open(space_path, os.O_RDONLY)
    s = open_read_pipe(space_path)
    # space = json.loads(os.read(s,1024).decode())
    space = read_from_pipe(s)
    close_pipe([channel_pipe, complete_pipe])
    if not debug_mode:
        summary_writer = None
    if agent_name == 'ddpg':
        os.close(s)
        return ddpg_agent_s.DDPGAgent(
            sess,
            action_space=spaces.Box(space[0],
                                    space[1],
                                    shape=space[2],
                                    dtype=np.float32),  #num_actions=space,
            summary_writer=summary_writer)
    elif agent_name == 'dqn':
        os.close(s)
        return dqn_agent.DQNAgent(sess,
                                  num_actions=space,
                                  summary_writer=summary_writer)
    elif agent_name == 'rainbow':
        os.close(s)
        return rainbow_agent.RainbowAgent(sess,
                                          num_actions=space,
                                          summary_writer=summary_writer)
    elif agent_name == 'implicit_quantile':
        os.close(s)
        return implicit_quantile_agent.ImplicitQuantileAgent(
            sess, num_actions=space, summary_writer=summary_writer)
    else:
        os.close(s)
        raise ValueError('Unknown agent: {}'.format(agent_name))
def create_agent(sess, environment, summary_writer=None):
    """Creates a DQN agent.

  Args:
    sess: A `tf.Session` object for running associated ops.
    environment: An Atari 2600 Gym environment.
    summary_writer: A Tensorflow summary writer to pass to the agent
      for in-agent training statistics in Tensorboard.

  Returns:
    agent: An RL agent.

  Raises:
    ValueError: If `agent_name` is not in supported list.
  """
    print("enter")
    s = open_read_pipe(space_path)
    space = read_from_pipe(s)
    print("read space pipe")
    # print("space ", space)
    if not FLAGS.debug_mode:
        summary_writer = None
    if FLAGS.agent_name == 'ddpg':
        os.close(s)
        # return ddpg_agent.DDPGAgent(sess, action_space=spaces.Box(space[0], space[1], shape=space[2], dtype=np.float32),# num_actions=space,
        #                           summary_writer=summary_writer)
        return ddpg_agent_s.DDPGAgent(
            sess,
            action_space=spaces.Box(space[0],
                                    space[1],
                                    shape=space[2],
                                    dtype=np.float32),  # num_actions=space,
            summary_writer=summary_writer)
    elif FLAGS.agent_name == 'dqn':
        # return dqn_agent.DQNAgent(sess, num_actions=environment.action_space_n(),
        #                           summary_writer=summary_writer)
        os.close(s)
        return dqn_agent.DQNAgent(sess,
                                  num_actions=space[2][0],
                                  summary_writer=summary_writer)
    elif FLAGS.agent_name == 'rainbow':
        # return rainbow_agent.RainbowAgent(
        #     sess, num_actions=environment.action_space_n(),
        #     summary_writer=summary_writer)
        os.close(s)
        return rainbow_agent.RainbowAgent(sess,
                                          num_actions=space[2][0],
                                          summary_writer=summary_writer)
    elif FLAGS.agent_name == 'implicit_quantile':
        # return implicit_quantile_agent.ImplicitQuantileAgent(
        #     sess, num_actions=environment.action_space_n(),
        #     summary_writer=summary_writer)
        os.close(s)
        return implicit_quantile_agent.ImplicitQuantileAgent(
            sess, num_actions=space[2][0], summary_writer=summary_writer)
    else:
        os.close(s)
        raise ValueError('Unknown agent: {}'.format(FLAGS.agent_name))
Beispiel #4
0
    def _initialize_episode(self):
        """Initialization for a new episode.

    Returns:
      action: int, the initial action chosen by the agent.
    """
        self.length_rnd = np.random.choice(self.length)
        self.theta_rnd = np.random.choice(self.theta)

        if self.reward_pipe[0] == None:
            self.channel_pipe = open_write_pipe(channel_name)
            write_to_pipe([self.channel_pipe] * n, [i for i in range(n)])
            self.complete_pipe = open_write_pipe("/tmp/complete.pipe")
            write_to_pipe(self.complete_pipe, 1)
            self.goal_pipe = open_write_pipe(goal_path)
            write_to_pipe(
                self.goal_pipe,
                [round(self.length_rnd, 4),
                 round(self.theta_rnd, 4)])

            self.action_pipe = open_write_pipe(action_name_list)
            self.reset_pipe = open_write_pipe(reset_name_list)

            self.obs_pipe = open_read_pipe(obs_name_list)
            self.touch_pipe = open_read_pipe(touch_name_list)
            self.reward_pipe = open_read_pipe(reward_name_list)
            self.over_pipe = open_read_pipe(over_name_list)
            self.terminal_pipe = open_read_pipe(terminal_name_list)

        # send the number of channel

        # initial_observation = [json.loads(os.read(obs_pipe, 50000)) for obs_pipe in self.obs_pipe]
        '''
    initial_observation = [read_from_pipe(obs_pipe) for obs_pipe in self.obs_pipe]
    # print("\ncall initial setp")
    # list to np.array
    initial_observation = [np.asarray(observation) for observation in initial_observation]
    '''
        initial_observation = [np.zeros([100, 100, 3])]
        initial_state = [
            np.array(read_from_pipe(state_pipe))
            for state_pipe in self.touch_pipe
        ]
        # print(initial_state)

        return [
            self._agent.begin_episode(observation, state)
            for observation, state in zip(initial_observation, initial_state)
        ]
terminal_name = "/tmp/term_out"
reset_name = "/tmp/reset_in"
read_name = [action_name, reset_name]
write_name = [obs_name, touch_name, reward_name, over_name, terminal_name]
output("ready to make pipe")
make_pipe(channel_name)
make_pipe(space_name)
make_pipe(goal_name)
make_pipe('/tmp/complete.pipe')
output("make pipe channel, space, goal")

space_pipe = open_write_pipe(space_name)
output("open read pipe space")
channel_pipe = open_read_pipe(channel_name)
output("open read pipe channel")
channel = read_from_pipe(channel_pipe, 1)
output("read from pipe channel: {}".format(channel))

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)
'''evaluation use'''
goal_pipe = open_read_pipe(goal_name)
goal_info = read_from_pipe(goal_pipe)
Beispiel #6
0
    def _run_one_episode(self):
        """Executes a full trajectory of the agent interacting with the environment.

    Returns:
      The number of steps taken and the total reward.
    """
        step_number = 0
        total_reward = 0.

        action = self._initialize_episode()
        is_terminal = False
        n_list = [i for i in range(n)]

        # Keep interacting until we reach a terminal state.
        while 1:
            step_number += 1
            # may use list to remove terminal pipe
            for i in n_list:

                observation, state, reward, is_terminal = self._run_one_step(
                    i, action[i].tolist())

                total_reward += reward

                # Perform reward clipping.
                # reward = np.clip(reward, -1, 1)
                gameover = read_from_pipe(self.over_pipe[i])
                if (gameover or step_number == self._max_steps_per_episode):
                    write_to_pipe(self.reset_pipe[i], True)

                    close_pipe([
                        self.action_pipe[i], self.reset_pipe[i],
                        self.obs_pipe[i], self.touch_pipe[i],
                        self.reward_pipe[i], self.over_pipe[i],
                        self.terminal_pipe[i]
                    ])
                    self.reward_pipe[i] = None
                    print("GAME_OVER")
                    self._end_episode(reward)

                    n_list.remove(i)
                    time.sleep(0.032)

                    continue
                # seems not to go into elif condition
                elif is_terminal:
                    print("TERMINAL")

                    self._agent.end_episode(reward)
                    action[i] = self._agent.begin_episode(observation, state)
                else:
                    write_to_pipe(self.reset_pipe[i], gameover)
                    action[i] = self._agent.step(reward, observation, state)

            if len(n_list) == 0:
                close_pipe(self.channel_pipe)
                close_pipe(self.complete_pipe)
                close_pipe(self.goal_pipe)
                break

            # self._end_episode(reward)

        return step_number, total_reward
Beispiel #7
0
            total_reward = 0
            is_terminal = False
            channel_pipe = open_write_pipe(channel_name)
            write_to_pipe(channel_pipe, 0)
            complete_pipe = open_write_pipe("/tmp/complete.pipe")
            write_to_pipe(complete_pipe, 1)

            write_to_pipe(goal_pipe, [round(length[l], 4), round(theta[t], 4)])

            action_pipe, reset_pipe = open_write_pipe(write_name_list)
            obs_pipe, touch_pipe, reward_pipe, over_pipe, terminal_pipe = open_read_pipe(
                read_name_list)
            """initial_observation_list = read_from_pipe(obs_pipe)"""
            """initial_observation = np.asarray(initial_observation_list)"""
            initial_observation = np.zeros([100, 100, 3])
            initial_state_list = read_from_pipe(touch_pipe)
            initial_state = np.asarray(initial_state_list)
            action = agent.begin_episode(initial_observation, initial_state)
            time.sleep(0.032)
            print('episodes %d' % j)
            episode_distance = []
            cnt1 = 0
            cnt2 = 0
            step_cnt = 0
            while 1:
                action = action.tolist()
                write_to_pipe(action_pipe, action)

                state = read_from_pipe(touch_pipe)
                state = np.asarray(state)
                reward = read_from_pipe(reward_pipe)
Beispiel #8
0
obs_name = "/tmp/obs_out"
touch_name = "/tmp/touch_out"
reward_name = "/tmp/reward_out"
over_name = "/tmp/over_out"
terminal_name = "/tmp/term_out"
reset_name = "/tmp/reset_in"
read_name = [action_name, reset_name]
write_name = [obs_name, touch_name, reward_name, over_name, terminal_name]

make_pipe(channel_name)
make_pipe(space_name)
make_pipe('/tmp/complete.pipe')

space_pipe = open_write_pipe(space_name)
channel_pipe = open_read_pipe(channel_name)
channel = read_from_pipe(channel_pipe, 1)

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)

# head + tail name pipe
read_name_list = [(i + "%s.pipe" % channel) for i in read_name]
write_name_list = [(i + "%s.pipe" % channel) for i in write_name]