Example #1
0
 def __init__(self,
              ob_space,
              ac_space,
              use_self_fed_heads=True,
              use_lstm=False,
              hs_len=None):
     shape_dtype = lambda x: (x.shape, x.dtype)
     _fields = ['X']
     specs = [map_gym_space_to_structure(shape_dtype, ob_space)]
     templates = [template_structure_from_gym_space(ob_space)]
     if not use_self_fed_heads:
         _fields.append('A')
         specs.append(map_gym_space_to_structure(shape_dtype, ac_space))
         templates.append(template_structure_from_gym_space(ac_space))
     if use_lstm:
         assert int(hs_len) == hs_len
         _fields.extend(['S', 'M'])
         specs.extend([
             ([hs_len], np.float32),
             ([], np.bool),
         ])
         templates.extend([
             None,
             None,
         ])
     super(InfData, self).__init__(_fields, specs, templates)
Example #2
0
 def __init__(self,
              ob_space,
              ac_space,
              n_v,
              use_lstm=False,
              hs_len=None,
              distillation=False,
              use_oppo_data=False,
              random_policy=True):
     _fields = ['X', 'A']
     shape_dtype = lambda x: (x.shape, x.dtype)
     specs = [
         map_gym_space_to_structure(shape_dtype, ob_space),
         map_gym_space_to_structure(shape_dtype, ac_space)
     ]
     templates = [
         template_structure_from_gym_space(ob_space),
         template_structure_from_gym_space(ac_space)
     ]
     if random_policy:
         _fields.append('neglogp')
         specs.append(
             map_gym_space_to_structure(lambda x: ([], np.float32),
                                        ac_space))
         templates.append(template_structure_from_gym_space(ac_space))
     if use_lstm:
         assert int(hs_len) == hs_len
         _fields.extend(['S', 'M'])
         specs.extend([
             ([hs_len], np.float32),
             ([], np.bool),
         ])
         templates.extend([
             None,
             None,
         ])
     if distillation:
         _fields.append('flatparam')
         logit_shape_dtype = lambda x: (make_pdtype(x).param_shape(), np.
                                        float32)
         param_shape_dtype = map_gym_space_to_structure(
             logit_shape_dtype, ac_space)
         param_templates = template_structure_from_gym_space(ac_space)
         specs.append(param_shape_dtype)
         templates.append(param_templates)
     if use_oppo_data:
         _fields.append('OPPO_X')
         specs.append(map_gym_space_to_structure(shape_dtype, ob_space))
         templates.append(template_structure_from_gym_space(ob_space))
         if use_lstm:
             _fields.append('OPPO_S')  # oppo's mask is the same as self
             specs.append(([hs_len], np.float32))
             templates.append(None)
     self.specs = specs
     self.templates = templates
     super(PGData, self).__init__(_fields, specs, templates)
Example #3
0
def gym_ddpg_actor_test():
    mycfg = {
        'test': False,
        'use_loss_type': 'none',
        'use_value_head': False,
        'n_v': 4,
        'use_lstm': True,
        'batch_size': 1,
        'rollout_len': 1,
        'nlstm': 64,
        'hs_len': 64 * 2,
        'lstm_layer_norm': True,
        'weight_decay': 0.0005
    }

    ob_space = spaces.Box(shape=(11, ), dtype=np.float32, low=0, high=1)
    ac_space = spaces.Box(shape=(2, ), low=-1.0, high=1.0, dtype=np.float32)

    nc = net_config_cls(ob_space, ac_space, **mycfg)
    inputs = net_inputs_placeholders_fun(nc)
    out = net_build_fun(inputs, nc, scope='gym_ddpg')
    sample = ob_space.sample()
    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)
    feed_dict = {inputs.X: [sample]}
    feed_dict[inputs.S] = np.zeros(shape=[1, nc.hs_len])
    feed_dict[inputs.M] = np.zeros(shape=[1])
    from tensorflow.contrib.framework import nest
    import tpolicies.tp_utils as tp_utils
    ac_structure = tp_utils.template_structure_from_gym_space(ac_space)
    a = nest.map_structure_up_to(ac_structure, lambda head: head.sam,
                                 out.self_fed_heads)
    sam = sess.run(a, feed_dict=feed_dict)
    print(sam)
    pass
Example #4
0
 def __init__(self, ob_space, ac_space, n_v, use_lstm=False, hs_len=None,
              distillation=False, version='v1', use_oppo_data=False):
   _fields = ['X', 'A', 'neglogp']
   shape_dtype = lambda x: (x.shape, x.dtype)
   logit_shape_dtype = lambda x: (make_pdtype(x).param_shape(), np.float32)
   if version == 'v1': # neglogp/logits is one long vector
     neglogp_shape_dtype = ([len(ac_space.spaces)], np.float32)
     neglogp_templates = None
     logits_shape_dtype = (logit_shape_dtype(ac_space), np.float32)
     logits_templates = None
   elif version == 'v2': # neglogp/logits is structure same as ac_space
     neglogp_shape_dtype = map_gym_space_to_structure(lambda x: ([], np.float32), ac_space)
     neglogp_templates = template_structure_from_gym_space(ac_space)
     logits_shape_dtype = map_gym_space_to_structure(logit_shape_dtype, ac_space)
     logits_templates = template_structure_from_gym_space(ac_space)
   else:
     raise KeyError('version not support!')
   specs = [map_gym_space_to_structure(shape_dtype, ob_space),
            map_gym_space_to_structure(shape_dtype, ac_space),
            neglogp_shape_dtype]
   templates = [template_structure_from_gym_space(ob_space),
                template_structure_from_gym_space(ac_space),
                neglogp_templates]
   if use_lstm:
     assert int(hs_len) == hs_len
     _fields.extend(['S', 'M'])
     specs.extend([([hs_len], np.float32),
                   ([], np.bool), ])
     templates.extend([None, None, ])
   if distillation:
     _fields.append('logits')
     specs.append(logits_shape_dtype)
     templates.append(logits_templates)
   if use_oppo_data:
     _fields.append('OPPO_X')
     specs.append(map_gym_space_to_structure(shape_dtype, ob_space))
     templates.append(template_structure_from_gym_space(ob_space))
     if use_lstm:
       _fields.append('OPPO_S')  # oppo's mask is the same as self
       specs.append(([hs_len], np.float32))
       templates.append(None)
   self.specs = specs
   self.templates = templates
   super(PGData, self).__init__(_fields, specs, templates)
def conv_lstm_actor_test():
    mycfg = {
        'test': False,
        'use_loss_type': 'none',
        'use_value_head': False,
        'n_v': 4,
        'sync_statistics': None,
        'use_lstm': True,
        'batch_size': 1,
        'rollout_len': 1,
        'nlstm': 64,
        'hs_len': 64 * 2,
        'lstm_layer_norm': True,
        'weight_decay': 0.0005
    }

    ob_space = spaces.Tuple([
        spaces.Tuple([
            spaces.Box(shape=(11, 11, 22), dtype=np.float32, low=0, high=1),
            spaces.Box(shape=(2, ), dtype=np.int32, low=0, high=10),
            spaces.Box(shape=[6], dtype=np.bool, low=0, high=1)
        ])
    ] * 2)
    ac_space = spaces.Tuple([spaces.Discrete(n=6)] * 2)

    nc = net_config_cls(ob_space, ac_space, **mycfg)
    inputs = net_inputs_placeholders_fun(nc)
    out = net_build_fun(inputs, nc, scope='conv_lstm')
    sample = ob_space.sample()
    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)
    feed_dict = {}
    for s, input in zip(sample, inputs.X):
        for x_np, x in zip(s, input):
            feed_dict[x] = [x_np]
    feed_dict[inputs.S] = np.zeros(shape=[1, nc.hs_len])
    feed_dict[inputs.M] = np.zeros(shape=[1])
    from tensorflow.contrib.framework import nest
    import tpolicies.tp_utils as tp_utils
    ac_structure = tp_utils.template_structure_from_gym_space(ac_space)
    a = nest.map_structure_up_to(ac_structure, lambda head: head.sam,
                                 out.self_fed_heads)
    sam = sess.run(a, feed_dict=feed_dict)
    print(sam)
    pass
Example #6
0
    def __init__(self,
                 policy,
                 ob_space,
                 ac_space,
                 n_v=1,
                 scope_name="model",
                 policy_config=None,
                 use_gpu_id=-1):
        # check
        assert hasattr(policy, 'net_config_cls')
        assert hasattr(policy, 'net_build_fun')
        assert hasattr(policy, 'net_inputs_placeholders_fun')
        # bookkeeping
        self.ob_space = ob_space
        self.ob_space = ac_space
        self._ac_structure = tp_utils.template_structure_from_gym_space(
            ac_space)

        # build the net
        if use_gpu_id < 0:  # not using GPU
            self.sess = tf.Session()
        else:
            tf_config = tf.ConfigProto(allow_soft_placement=True,
                                       log_device_placement=True)
            tf_config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=tf_config)

        policy_config = {} if policy_config is None else policy_config
        policy_config['batch_size'] = 1
        policy_config['test'] = True
        self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
        if use_gpu_id < 0:  # not using GPU
            self.inputs_ph = policy.net_inputs_placeholders_fun(self.nc)
            self.net_out = policy.net_build_fun(self.inputs_ph,
                                                self.nc,
                                                scope=scope_name)
        else:
            with tf.device('/gpu:{}'.format(use_gpu_id)):
                self.inputs_ph = policy.net_inputs_placeholders_fun(self.nc)
                self.net_out = policy.net_build_fun(self.inputs_ph,
                                                    self.nc,
                                                    scope=scope_name)
        # saving/loading ops
        self.params = self.net_out.vars.all_vars
        self.params_ph = [
            tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params
        ]
        self.params_assign_ops = [
            p.assign(np_p) for p, np_p in zip(self.params, self.params_ph)
        ]

        # initialize the net params
        tf.global_variables_initializer().run(session=self.sess)

        # numpy rnn state stuff (if any)
        if self.net_out.S is None:
            self._hs_len = None
            self._state = None
        else:
            self._hs_len = self.net_out.S.shape[1].value
            self._state = np.zeros(shape=(self._hs_len, ), dtype=np.float32)
        pass
Example #7
0
 def __init__(self,
              league_mgr_addr,
              model_pool_addrs,
              port,
              ds,
              batch_size,
              ob_space,
              ac_space,
              policy,
              outputs=['a'],
              policy_config={},
              gpu_id=0,
              compress=True,
              batch_worker_num=4,
              update_model_seconds=60,
              learner_id=None,
              log_seconds=60,
              model_key="",
              task_attr='model_key',
              **kwargs):
     self._update_model_seconds = update_model_seconds
     self._log_seconds = log_seconds
     self._learner_id = learner_id
     self._task_attr = task_attr.split('.')
     if model_key:
         # If model_key is given, this indicates the infserver works
         # for a fixed model inference
         self._league_mgr_apis = None
         self.is_rl = False
         self.model_key = model_key
     else:
         # If model_key is absent, this indicates an infserver
         # that performs varying policy inference, and model_key will be
         # assigned by querying league_mgr
         self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr)
         self.is_rl = True
         self.model_key = None
     self.model = None
     self._model_pool_apis = ModelPoolAPIs(model_pool_addrs)
     assert hasattr(policy, 'net_config_cls')
     assert hasattr(policy, 'net_build_fun')
     # bookkeeping
     self.ob_space = ob_space
     self.ob_space = ac_space
     self.batch_size = batch_size
     self._ac_structure = tp_utils.template_structure_from_gym_space(
         ac_space)
     self.outputs = outputs
     # build the net
     policy_config = {} if policy_config is None else policy_config
     policy_config['batch_size'] = batch_size
     use_gpu = (gpu_id >= 0)
     self.data_server = InferDataServer(
         port=port,
         batch_size=batch_size,
         ds=ds,
         batch_worker_num=batch_worker_num,
         use_gpu=use_gpu,
         compress=compress,
     )
     config = tf.ConfigProto(allow_soft_placement=True)
     if use_gpu:
         config.gpu_options.visible_device_list = str(gpu_id)
         config.gpu_options.allow_growth = True
         if 'use_xla' in policy_config and policy_config['use_xla']:
             config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
     self._sess = tf.Session(config=config)
     self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
     self.net_out = policy.net_build_fun(self.data_server._batch_input,
                                         self.nc,
                                         scope='Inf_server')
     # saving/loading ops
     self.params = self.net_out.vars.all_vars
     self.params_ph = [
         tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params
     ]
     self.params_assign_ops = [
         p.assign(np_p) for p, np_p in zip(self.params, self.params_ph)
     ]
     # initialize the net params
     tf.global_variables_initializer().run(session=self._sess)
     self.setup_fetches(outputs)
     self.id_and_fetches = [self.data_server._batch_data_id, self.fetches]
     self._update_model()
Example #8
0
  def __init__(self, policy, ob_space, ac_space, n_v=1, scope_name="model",
               policy_config=None, use_gpu_id=-1, infserver_addr=None,
               compress=True):
    # check
    assert hasattr(policy, 'net_config_cls')
    assert hasattr(policy, 'net_build_fun')
    assert hasattr(policy, 'net_inputs_placeholders_fun')

    # bookkeeping
    self.ob_space = ob_space
    self.ob_space = ac_space
    self._ac_structure = tp_utils.template_structure_from_gym_space(ac_space)
    self.infserver_addr = infserver_addr
    self.compress = compress  # send compressed data to infserver
    self.n_v = n_v  # number of reward channels

    policy_config = {} if policy_config is None else policy_config
    policy_config['batch_size'] = 1
    policy_config['test'] = True
    self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config)
    self.rnn = (False if 'use_lstm' not in policy_config
                else policy_config['use_lstm'])
    # numpy rnn state stuff (if any)
    self._last_state = None
    if not self.rnn:
      self._hs_len = None
      self._state = None
    else:
      self._hs_len = self.nc.hs_len
      self._state = np.zeros(shape=(self._hs_len,), dtype=np.float32)

    if infserver_addr is None:
      # build the net
      if use_gpu_id < 0:  # not using GPU
        self.sess = tf.Session()
        device = '/cpu:0'
      else:
        device = '/gpu:{}'.format(use_gpu_id)
        tf_config = tf.ConfigProto(allow_soft_placement=True,
                                   log_device_placement=True)
        tf_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=tf_config)
      with tf.device(device):
        self.inputs_ph = policy.net_inputs_placeholders_fun(self.nc)
        self.net_out = policy.net_build_fun(self.inputs_ph, self.nc,
                                            scope=scope_name)
      # saving/loading ops
      self.params = self.net_out.vars.all_vars
      self.params_ph = [tf.placeholder(p.dtype, shape=p.get_shape())
                        for p in self.params]
      self.params_assign_ops = [
        p.assign(np_p) for p, np_p in zip(self.params, self.params_ph)
      ]

      # initialize the net params
      tf.global_variables_initializer().run(session=self.sess)
    else:
      ds = InfData(ob_space, ac_space, policy_config['use_self_fed_heads'],
                   self.rnn, self._hs_len)
      self.apis = InfServerAPIs(infserver_addr, ds, compress)
      self.ds = ds