예제 #1
0
    def _initialize_episode(self):
        """Initialization for a new episode.

    Returns:
      action: int, the initial action chosen by the agent.
    """
        self.length_rnd = np.random.choice(self.length)
        self.theta_rnd = np.random.choice(self.theta)

        if self.reward_pipe[0] == None:
            self.channel_pipe = open_write_pipe(channel_name)
            write_to_pipe([self.channel_pipe] * n, [i for i in range(n)])
            self.complete_pipe = open_write_pipe("/tmp/complete.pipe")
            write_to_pipe(self.complete_pipe, 1)
            self.goal_pipe = open_write_pipe(goal_path)
            write_to_pipe(
                self.goal_pipe,
                [round(self.length_rnd, 4),
                 round(self.theta_rnd, 4)])

            self.action_pipe = open_write_pipe(action_name_list)
            self.reset_pipe = open_write_pipe(reset_name_list)

            self.obs_pipe = open_read_pipe(obs_name_list)
            self.touch_pipe = open_read_pipe(touch_name_list)
            self.reward_pipe = open_read_pipe(reward_name_list)
            self.over_pipe = open_read_pipe(over_name_list)
            self.terminal_pipe = open_read_pipe(terminal_name_list)

        # send the number of channel

        # initial_observation = [json.loads(os.read(obs_pipe, 50000)) for obs_pipe in self.obs_pipe]
        '''
    initial_observation = [read_from_pipe(obs_pipe) for obs_pipe in self.obs_pipe]
    # print("\ncall initial setp")
    # list to np.array
    initial_observation = [np.asarray(observation) for observation in initial_observation]
    '''
        initial_observation = [np.zeros([100, 100, 3])]
        initial_state = [
            np.array(read_from_pipe(state_pipe))
            for state_pipe in self.touch_pipe
        ]
        # print(initial_state)

        return [
            self._agent.begin_episode(observation, state)
            for observation, state in zip(initial_observation, initial_state)
        ]
예제 #2
0
def create_agent(sess, summary_writer=None):

    # s = os.open(space_path, os.O_RDONLY)
    s = open_read_pipe(space_path)
    # space = json.loads(os.read(s,1024).decode())
    space = read_from_pipe(s)
    close_pipe([channel_pipe, complete_pipe])
    if not debug_mode:
        summary_writer = None
    if agent_name == 'ddpg':
        os.close(s)
        return ddpg_agent_s.DDPGAgent(
            sess,
            action_space=spaces.Box(space[0],
                                    space[1],
                                    shape=space[2],
                                    dtype=np.float32),  #num_actions=space,
            summary_writer=summary_writer)
    elif agent_name == 'dqn':
        os.close(s)
        return dqn_agent.DQNAgent(sess,
                                  num_actions=space,
                                  summary_writer=summary_writer)
    elif agent_name == 'rainbow':
        os.close(s)
        return rainbow_agent.RainbowAgent(sess,
                                          num_actions=space,
                                          summary_writer=summary_writer)
    elif agent_name == 'implicit_quantile':
        os.close(s)
        return implicit_quantile_agent.ImplicitQuantileAgent(
            sess, num_actions=space, summary_writer=summary_writer)
    else:
        os.close(s)
        raise ValueError('Unknown agent: {}'.format(agent_name))
def create_agent(sess, environment, summary_writer=None):
    """Creates a DQN agent.

  Args:
    sess: A `tf.Session` object for running associated ops.
    environment: An Atari 2600 Gym environment.
    summary_writer: A Tensorflow summary writer to pass to the agent
      for in-agent training statistics in Tensorboard.

  Returns:
    agent: An RL agent.

  Raises:
    ValueError: If `agent_name` is not in supported list.
  """
    print("enter")
    s = open_read_pipe(space_path)
    space = read_from_pipe(s)
    print("read space pipe")
    # print("space ", space)
    if not FLAGS.debug_mode:
        summary_writer = None
    if FLAGS.agent_name == 'ddpg':
        os.close(s)
        # return ddpg_agent.DDPGAgent(sess, action_space=spaces.Box(space[0], space[1], shape=space[2], dtype=np.float32),# num_actions=space,
        #                           summary_writer=summary_writer)
        return ddpg_agent_s.DDPGAgent(
            sess,
            action_space=spaces.Box(space[0],
                                    space[1],
                                    shape=space[2],
                                    dtype=np.float32),  # num_actions=space,
            summary_writer=summary_writer)
    elif FLAGS.agent_name == 'dqn':
        # return dqn_agent.DQNAgent(sess, num_actions=environment.action_space_n(),
        #                           summary_writer=summary_writer)
        os.close(s)
        return dqn_agent.DQNAgent(sess,
                                  num_actions=space[2][0],
                                  summary_writer=summary_writer)
    elif FLAGS.agent_name == 'rainbow':
        # return rainbow_agent.RainbowAgent(
        #     sess, num_actions=environment.action_space_n(),
        #     summary_writer=summary_writer)
        os.close(s)
        return rainbow_agent.RainbowAgent(sess,
                                          num_actions=space[2][0],
                                          summary_writer=summary_writer)
    elif FLAGS.agent_name == 'implicit_quantile':
        # return implicit_quantile_agent.ImplicitQuantileAgent(
        #     sess, num_actions=environment.action_space_n(),
        #     summary_writer=summary_writer)
        os.close(s)
        return implicit_quantile_agent.ImplicitQuantileAgent(
            sess, num_actions=space[2][0], summary_writer=summary_writer)
    else:
        os.close(s)
        raise ValueError('Unknown agent: {}'.format(FLAGS.agent_name))
reward_name = "/tmp/reward_out"
over_name = "/tmp/over_out"
terminal_name = "/tmp/term_out"
reset_name = "/tmp/reset_in"
read_name = [action_name, reset_name]
write_name = [obs_name, touch_name, reward_name, over_name, terminal_name]
output("ready to make pipe")
make_pipe(channel_name)
make_pipe(space_name)
make_pipe(goal_name)
make_pipe('/tmp/complete.pipe')
output("make pipe channel, space, goal")

space_pipe = open_write_pipe(space_name)
output("open read pipe space")
channel_pipe = open_read_pipe(channel_name)
output("open read pipe channel")
channel = read_from_pipe(channel_pipe, 1)
output("read from pipe channel: {}".format(channel))

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)
'''evaluation use'''
예제 #5
0
        # Run 100 episodes
        for j in range(100):
            l = int(j / 20)
            t = j % 20

            total_reward = 0
            is_terminal = False
            channel_pipe = open_write_pipe(channel_name)
            write_to_pipe(channel_pipe, 0)
            complete_pipe = open_write_pipe("/tmp/complete.pipe")
            write_to_pipe(complete_pipe, 1)

            write_to_pipe(goal_pipe, [round(length[l], 4), round(theta[t], 4)])

            action_pipe, reset_pipe = open_write_pipe(write_name_list)
            obs_pipe, touch_pipe, reward_pipe, over_pipe, terminal_pipe = open_read_pipe(
                read_name_list)
            """initial_observation_list = read_from_pipe(obs_pipe)"""
            """initial_observation = np.asarray(initial_observation_list)"""
            initial_observation = np.zeros([100, 100, 3])
            initial_state_list = read_from_pipe(touch_pipe)
            initial_state = np.asarray(initial_state_list)
            action = agent.begin_episode(initial_observation, initial_state)
            time.sleep(0.032)
            print('episodes %d' % j)
            episode_distance = []
            cnt1 = 0
            cnt2 = 0
            step_cnt = 0
            while 1:
                action = action.tolist()
                write_to_pipe(action_pipe, action)
예제 #6
0
action_name = "/tmp/action_in"
obs_name = "/tmp/obs_out"
touch_name = "/tmp/touch_out"
reward_name = "/tmp/reward_out"
over_name = "/tmp/over_out"
terminal_name = "/tmp/term_out"
reset_name = "/tmp/reset_in"
read_name = [action_name, reset_name]
write_name = [obs_name, touch_name, reward_name, over_name, terminal_name]

make_pipe(channel_name)
make_pipe(space_name)
make_pipe('/tmp/complete.pipe')

space_pipe = open_write_pipe(space_name)
channel_pipe = open_read_pipe(channel_name)
channel = read_from_pipe(channel_pipe, 1)

print(action_num())
if channel == 0:
    complete_pipe = open_read_pipe("/tmp/complete.pipe")
    complete = read_from_pipe(complete_pipe, 1)
    if not complete:
        print("write space")
        write_to_pipe(space_pipe, action_space_info())
    # os.close(complete_pipe)
    close_pipe(complete_pipe)
print("I AM CHANNEL %s" % channel)

# head + tail name pipe
read_name_list = [(i + "%s.pipe" % channel) for i in read_name]