Ejemplo n.º 1
0
  def _build_agent(self, id, agent_file):
    Logger.print2('Agent {:d}: {}'.format(id, agent_file))
    if (agent_file == 'none'):
      agent = None
    else:
      agent = AgentBuilder.build_agent(self, id, agent_file)
      assert (agent != None), 'Failed to build agent {:d} from: {}'.format(id, agent_file)

    return agent
Ejemplo n.º 2
0
 def save_model(self, out_path):
     with self.sess.as_default(), self.graph.as_default():
         try:
             save_path = self.saver.save(self.sess,
                                         out_path,
                                         write_meta_graph=False,
                                         write_state=False)
             Logger.print2('Model saved to: ' + save_path)
         except:
             Logger.print2("Failed to save model to: " + save_path)
     return
Ejemplo n.º 3
0
    def _build_nets(self, json_data):
        assert self.ACTOR_NET_KEY in json_data
        assert self.CRITIC_NET_KEY in json_data

        actor_net_name = json_data[self.ACTOR_NET_KEY]
        critic_net_name = json_data[self.CRITIC_NET_KEY]
        actor_init_output_scale = 1 if (
            self.ACTOR_INIT_OUTPUT_SCALE_KEY
            not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]

        s_size = self.get_state_size()
        g_size = self.get_goal_size()
        a_size = self.get_action_size()

        # setup input tensors
        self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size], name="s")
        self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size], name="a")
        self.tar_val_tf = tf.placeholder(tf.float32,
                                         shape=[None],
                                         name="tar_val")
        self.adv_tf = tf.placeholder(tf.float32, shape=[None], name="adv")
        self.g_tf = tf.placeholder(
            tf.float32,
            shape=([None, g_size] if self.has_goal() else None),
            name="g")
        self.old_logp_tf = tf.placeholder(tf.float32,
                                          shape=[None],
                                          name="old_logp")
        self.exp_mask_tf = tf.placeholder(tf.float32,
                                          shape=[None],
                                          name="exp_mask")

        with tf.variable_scope('main'):
            with tf.variable_scope('actor'):
                self.a_mean_tf = self._build_net_actor(
                    actor_net_name, actor_init_output_scale)
            with tf.variable_scope('critic'):
                self.critic_tf = self._build_net_critic(critic_net_name)

        if (self.a_mean_tf != None):
            Logger.print2('Built actor net: ' + actor_net_name)

        if (self.critic_tf != None):
            Logger.print2('Built critic net: ' + critic_net_name)

        self.norm_a_std_tf = self.exp_params_curr.noise * tf.ones(a_size)
        norm_a_noise_tf = self.norm_a_std_tf * tf.random_normal(
            shape=tf.shape(self.a_mean_tf))
        norm_a_noise_tf *= tf.expand_dims(self.exp_mask_tf, axis=-1)
        self.sample_a_tf = self.a_mean_tf + norm_a_noise_tf * self.a_norm.std_tf
        self.sample_a_logp_tf = TFUtil.calc_logp_gaussian(
            x_tf=norm_a_noise_tf, mean_tf=None, std_tf=self.norm_a_std_tf)

        return
Ejemplo n.º 4
0
def build_arg_parser(args):
    arg_parser = ArgParser()
    arg_parser.load_args(args)

    arg_file = arg_parser.parse_string('arg_file', '')
    if (arg_file != ''):
        path = pybullet_data_local.getDataPath() + "/args/" + arg_file
        succ = arg_parser.load_file(path)
        Logger.print2(arg_file)
        assert succ, Logger.print2('Failed to	load args	from:	' + arg_file)

    return arg_parser
Ejemplo n.º 5
0
def main():
  # Command line arguments
  args = sys.argv[1:]
  arg_parser = ArgParser()
  arg_parser.load_args(args)

  num_workers = arg_parser.parse_int('num_workers', 1)
  assert (num_workers > 0)

  Logger.print2('Running with {:d} workers'.format(num_workers))
  cmd = 'mpiexec -n {:d} python DeepMimic_Optimizer.py '.format(num_workers)
  cmd += ' '.join(args)
  Logger.print2('cmd: ' + cmd)
  subprocess.call(cmd, shell=True)
  return
Ejemplo n.º 6
0
    def update(self):
        new_count = MPIUtil.reduce_sum(self.new_count)
        new_sum = MPIUtil.reduce_sum(self.new_sum)
        new_sum_sq = MPIUtil.reduce_sum(self.new_sum_sq)

        new_total = self.count + new_count
        if (self.count // self.CHECK_SYNC_COUNT !=
                new_total // self.CHECK_SYNC_COUNT):
            assert self.check_synced(), Logger.print2(
                'Normalizer parameters desynchronized')

        if new_count > 0:
            new_mean = self._process_group_data(new_sum / new_count, self.mean)
            new_mean_sq = self._process_group_data(new_sum_sq / new_count,
                                                   self.mean_sq)
            w_old = float(self.count) / new_total
            w_new = float(new_count) / new_total

            self.mean = w_old * self.mean + w_new * new_mean
            self.mean_sq = w_old * self.mean_sq + w_new * new_mean_sq
            self.count = new_total
            self.std = self.calc_std(self.mean, self.mean_sq)

            self.new_count = 0
            self.new_sum.fill(0)
            self.new_sum_sq.fill(0)

        return
Ejemplo n.º 7
0
    def _build_nets(self, json_data):
        assert self.ACTOR_NET_KEY in json_data
        assert self.CRITIC_NET_KEY in json_data

        actor_net_name = json_data[self.ACTOR_NET_KEY]
        critic_net_name = json_data[self.CRITIC_NET_KEY]
        actor_init_output_scale = 1 if (
            self.ACTOR_INIT_OUTPUT_SCALE_KEY
            not in json_data) else json_data[self.ACTOR_INIT_OUTPUT_SCALE_KEY]

        s_size = self.get_state_size()
        g_size = self.get_goal_size()
        a_size = self.get_action_size()

        # setup input tensors
        self.s_tf = tf.placeholder(tf.float32, shape=[None, s_size],
                                   name="s")  # observations
        self.tar_val_tf = tf.placeholder(tf.float32,
                                         shape=[None],
                                         name="tar_val")  # target value s
        self.adv_tf = tf.placeholder(tf.float32, shape=[None],
                                     name="adv")  # advantage
        self.a_tf = tf.placeholder(tf.float32, shape=[None, a_size],
                                   name="a")  # target actions
        self.g_tf = tf.placeholder(
            tf.float32,
            shape=([None, g_size] if self.has_goal() else None),
            name="g")  # goals

        with tf.variable_scope('main'):
            with tf.variable_scope('actor'):
                self.actor_tf = self._build_net_actor(actor_net_name,
                                                      actor_init_output_scale)
            with tf.variable_scope('critic'):
                self.critic_tf = self._build_net_critic(critic_net_name)

        if (self.actor_tf != None):
            Logger.print2('Built actor net: ' + actor_net_name)

        if (self.critic_tf != None):
            Logger.print2('Built critic net: ' + critic_net_name)

        return
Ejemplo n.º 8
0
 def _update_mode(self):
     if (self._mode == self.Mode.TRAIN):
         self._update_mode_train()
     elif (self._mode == self.Mode.TRAIN_END):
         self._update_mode_train_end()
     elif (self._mode == self.Mode.TEST):
         self._update_mode_test()
     else:
         assert False, Logger.print2("Unsupported RL agent mode" +
                                     str(self._mode))
     return
Ejemplo n.º 9
0
  def store(self, path):
    start_idx = MathUtil.INVALID_IDX
    n = path.pathlength()

    if (n > 0):
      assert path.is_valid()

      if path.check_vals():
        if self.buffers is None:
          self._init_buffers(path)

        idx = self._request_idx(n + 1)
        self._store_path(path, idx)
        self._add_sample_buffers(idx)

        self.num_paths += 1
        self.total_count += n + 1
        start_idx = idx[0]
      else:
        Logger.print2('Invalid path data value detected')

    return start_idx
Ejemplo n.º 10
0
    def __init__(self, world, id, json_data):
        self.world = world
        self.id = id
        self.logger = Logger()
        self._mode = self.Mode.TRAIN

        assert self._check_action_space(), \
            Logger.print2("Invalid action space, got {:s}".format(str(self.get_action_space())))

        self._enable_training = True
        self.path = Path()
        self.iter = int(0)
        self.start_time = time.time()
        self._update_counter = 0

        self.update_period = 1.0  # simulated time (seconds) before each training update
        self.iters_per_update = int(1)
        self.discount = 0.95
        self.mini_batch_size = int(32)
        self.replay_buffer_size = int(50000)
        self.init_samples = int(1000)
        self.normalizer_samples = np.inf
        self._local_mini_batch_size = self.mini_batch_size  # batch size for each work for multiprocessing
        self._need_normalizer_update = True
        self._total_sample_count = 0

        self._output_dir = ""
        self._int_output_dir = ""
        self.output_iters = 100
        self.int_output_iters = 100

        self.train_return = 0.0
        self.test_episodes = int(0)
        self.test_episode_count = int(0)
        self.test_return = 0.0
        self.avg_test_return = 0.0

        self.exp_anneal_samples = 320000
        self.exp_params_beg = ExpParams()
        self.exp_params_end = ExpParams()
        self.exp_params_curr = ExpParams()

        self._load_params(json_data)
        self._build_replay_buffer(self.replay_buffer_size)
        self._build_normalizers()
        self._build_bounds()
        self.reset()

        return
Ejemplo n.º 11
0
    def record(self, x):
        size = self.get_size()
        is_array = isinstance(x, np.ndarray)
        if not is_array:
            assert (size == 1)
            x = np.array([[x]])

        assert x.shape[-1] == size, \
            Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d}'.format(size, x.shape[-1]))
        x = np.reshape(x, [-1, size])

        self.new_count += x.shape[0]
        self.new_sum += np.sum(x, axis=0)
        self.new_sum_sq += np.sum(np.square(x), axis=0)
        return
Ejemplo n.º 12
0
    def end_episode(self):
        if (self.path.pathlength() > 0):
            self._end_path()

            if (self._mode == self.Mode.TRAIN
                    or self._mode == self.Mode.TRAIN_END):
                if (self.enable_training and self.path.pathlength() > 0):
                    self._store_path(self.path)
            elif (self._mode == self.Mode.TEST):
                self._update_test_return(self.path)
            else:
                assert False, Logger.print2("Unsupported RL agent mode" +
                                            str(self._mode))

            self._update_mode()
        return
Ejemplo n.º 13
0
    def update_flatgrad(self, flat_grad, grad_scale=1.0):
        if self.iter % self.CHECK_SYNC_ITERS == 0:
            assert self.check_synced(), Logger.print2(
                'Network parameters desynchronized')

        if grad_scale != 1.0:
            flat_grad *= grad_scale

        MPI.COMM_WORLD.Allreduce(flat_grad, self._global_flat_grad, op=MPI.SUM)
        self._global_flat_grad /= MPIUtil.get_num_procs()

        self._load_flat_grad(self._global_flat_grad)
        self.sess.run([self._update], self._grad_feed)
        self.iter += 1

        return
Ejemplo n.º 14
0
    def set_mean_std(self, mean, std):
        size = self.get_size()
        is_array = isinstance(mean, np.ndarray) and isinstance(std, np.ndarray)

        if not is_array:
            assert (size == 1)
            mean = np.array([mean])
            std = np.array([std])

        assert len(mean) == size and len(std) == size, \
            Logger.print2('Normalizer shape mismatch, expecting size {:d}, but got {:d} and {:d}'.format(size, len(mean), len(std)))

        self.mean = mean
        self.std = std
        self.mean_sq = self.calc_mean_sq(self.mean, self.std)
        return
Ejemplo n.º 15
0
  def build_agents(self):
    num_agents = self.env.get_num_agents()
    print("num_agents=", num_agents)
    self.agents = []

    Logger.print2('')
    Logger.print2('Num Agents: {:d}'.format(num_agents))

    agent_files = self.arg_parser.parse_strings('agent_files')
    print("len(agent_files)=", len(agent_files))
    assert (len(agent_files) == num_agents or len(agent_files) == 0)

    model_files = self.arg_parser.parse_strings('model_files')
    assert (len(model_files) == num_agents or len(model_files) == 0)

    output_path = self.arg_parser.parse_string('output_path')
    int_output_path = self.arg_parser.parse_string('int_output_path')

    for i in range(num_agents):
      curr_file = agent_files[i]
      curr_agent = self._build_agent(i, curr_file)

      if curr_agent is not None:
        curr_agent.output_dir = output_path
        curr_agent.int_output_dir = int_output_path
        Logger.print2(str(curr_agent))

        if (len(model_files) > 0):
          curr_model_file = model_files[i]
          if curr_model_file != 'none':
            curr_agent.load_model(pybullet_data_local.getDataPath() + "/" + curr_model_file)

      self.agents.append(curr_agent)
      Logger.print2('')

    self.set_enable_training(self.enable_training)

    return
Ejemplo n.º 16
0
from pybullet_utils_local.logger import Logger

logger = Logger()
logger.configure_output_file("e:/mylog.txt")
for i in range(10):
    logger.log_tabular("Iteration", 1)
Logger.print2("hello world")

logger.print_tabular()
logger.dump_tabular()
Ejemplo n.º 17
0
def shutdown():
  global world

  Logger.print2('Shutting down...')
  world.shutdown()
  return
Ejemplo n.º 18
0
    def __init__(self, renders=False, arg_file=''):

        self._arg_parser = ArgParser()
        Logger.print2(
            "===========================================================")
        succ = False
        if (arg_file != ''):
            path = pybullet_data_local.getDataPath() + "/args/" + arg_file
            succ = self._arg_parser.load_file(path)
            Logger.print2(arg_file)
        assert succ, Logger.print2('Failed to load args from: ' + arg_file)

        self._p = None
        self._time_step = 1. / 240.
        self._internal_env = None
        self._renders = renders
        self._discrete_actions = False
        self._arg_file = arg_file
        self._render_height = 200
        self._render_width = 320

        self.theta_threshold_radians = 12 * 2 * math.pi / 360
        self.x_threshold = 0.4  #2.4
        high = np.array([
            self.x_threshold * 2,
            np.finfo(np.float32).max, self.theta_threshold_radians * 2,
            np.finfo(np.float32).max
        ])

        ctrl_size = 43  #numDof
        root_size = 7  # root

        action_dim = ctrl_size - root_size

        action_bound_min = np.array([
            -4.79999999999, -1.00000000000, -1.00000000000, -1.00000000000,
            -4.00000000000, -1.00000000000, -1.00000000000, -1.00000000000,
            -7.77999999999, -1.00000000000, -1.000000000, -1.000000000,
            -7.850000000, -6.280000000, -1.000000000, -1.000000000,
            -1.000000000, -12.56000000, -1.000000000, -1.000000000,
            -1.000000000, -4.710000000, -7.779999999, -1.000000000,
            -1.000000000, -1.000000000, -7.850000000, -6.280000000,
            -1.000000000, -1.000000000, -1.000000000, -8.460000000,
            -1.000000000, -1.000000000, -1.000000000, -4.710000000
        ])

        #print("len(action_bound_min)=",len(action_bound_min))
        action_bound_max = np.array([
            4.799999999, 1.000000000, 1.000000000, 1.000000000, 4.000000000,
            1.000000000, 1.000000000, 1.000000000, 8.779999999, 1.000000000,
            1.0000000, 1.0000000, 4.7100000, 6.2800000, 1.0000000, 1.0000000,
            1.0000000, 12.560000, 1.0000000, 1.0000000, 1.0000000, 7.8500000,
            8.7799999, 1.0000000, 1.0000000, 1.0000000, 4.7100000, 6.2800000,
            1.0000000, 1.0000000, 1.0000000, 10.100000, 1.0000000, 1.0000000,
            1.0000000, 7.8500000
        ])
        #print("len(action_bound_max)=",len(action_bound_max))

        self.action_space = spaces.Box(action_bound_min, action_bound_max)
        observation_min = np.array([0.0] + [-100.0] + [-4.0] * 105 +
                                   [-500.0] * 90)
        observation_max = np.array([1.0] + [100.0] + [4.0] * 105 +
                                   [500.0] * 90)
        state_size = 197
        self.observation_space = spaces.Box(observation_min,
                                            observation_min,
                                            dtype=np.float32)

        self.seed()

        self.viewer = None
        self._configure()
Ejemplo n.º 19
0
    def _train(self):
        samples = self.replay_buffer.total_count
        self._total_sample_count = int(MPIUtil.reduce_sum(samples))
        end_training = False

        if (self.replay_buffer_initialized):
            if (self._valid_train_step()):
                prev_iter = self.iter
                iters = self._get_iters_per_update()
                avg_train_return = MPIUtil.reduce_avg(self.train_return)

                for i in range(iters):
                    curr_iter = self.iter
                    wall_time = time.time() - self.start_time
                    wall_time /= 60 * 60  # store time in hours

                    has_goal = self.has_goal()
                    s_mean = np.mean(self.s_norm.mean)
                    s_std = np.mean(self.s_norm.std)
                    g_mean = np.mean(self.g_norm.mean) if has_goal else 0
                    g_std = np.mean(self.g_norm.std) if has_goal else 0

                    self.logger.log_tabular("Iteration", self.iter)
                    self.logger.log_tabular("Wall_Time", wall_time)
                    self.logger.log_tabular("Samples",
                                            self._total_sample_count)
                    self.logger.log_tabular("Train_Return", avg_train_return)
                    self.logger.log_tabular("Test_Return",
                                            self.avg_test_return)
                    self.logger.log_tabular("State_Mean", s_mean)
                    self.logger.log_tabular("State_Std", s_std)
                    self.logger.log_tabular("Goal_Mean", g_mean)
                    self.logger.log_tabular("Goal_Std", g_std)
                    self._log_exp_params()

                    self._update_iter(self.iter + 1)
                    self._train_step()

                    Logger.print2("Agent " + str(self.id))
                    self.logger.print_tabular()
                    Logger.print2("")

                    if (self._enable_output()
                            and curr_iter % self.int_output_iters == 0):
                        self.logger.dump_tabular()

                if (prev_iter // self.int_output_iters !=
                        self.iter // self.int_output_iters):
                    end_training = self.enable_testing()

        else:

            Logger.print2("Agent " + str(self.id))
            Logger.print2("Samples: " + str(self._total_sample_count))
            Logger.print2("")

            if (self._total_sample_count >= self.init_samples):
                self.replay_buffer_initialized = True
                end_training = self.enable_testing()

        if self._need_normalizer_update:
            self._update_normalizers()
            self._need_normalizer_update = self.normalizer_samples > self._total_sample_count

        if end_training:
            self._init_mode_train_end()

        return
Ejemplo n.º 20
0
 def load_model(self, in_path):
     with self.sess.as_default(), self.graph.as_default():
         self.saver.restore(self.sess, in_path)
         self._load_normalizers()
         Logger.print2('Model loaded from: ' + in_path)
     return