def __init__(self, ob_space, ac_space, use_self_fed_heads=True, use_lstm=False, hs_len=None): shape_dtype = lambda x: (x.shape, x.dtype) _fields = ['X'] specs = [map_gym_space_to_structure(shape_dtype, ob_space)] templates = [template_structure_from_gym_space(ob_space)] if not use_self_fed_heads: _fields.append('A') specs.append(map_gym_space_to_structure(shape_dtype, ac_space)) templates.append(template_structure_from_gym_space(ac_space)) if use_lstm: assert int(hs_len) == hs_len _fields.extend(['S', 'M']) specs.extend([ ([hs_len], np.float32), ([], np.bool), ]) templates.extend([ None, None, ]) super(InfData, self).__init__(_fields, specs, templates)
def __init__(self, ob_space, ac_space, n_v, use_lstm=False, hs_len=None, distillation=False, use_oppo_data=False, random_policy=True): _fields = ['X', 'A'] shape_dtype = lambda x: (x.shape, x.dtype) specs = [ map_gym_space_to_structure(shape_dtype, ob_space), map_gym_space_to_structure(shape_dtype, ac_space) ] templates = [ template_structure_from_gym_space(ob_space), template_structure_from_gym_space(ac_space) ] if random_policy: _fields.append('neglogp') specs.append( map_gym_space_to_structure(lambda x: ([], np.float32), ac_space)) templates.append(template_structure_from_gym_space(ac_space)) if use_lstm: assert int(hs_len) == hs_len _fields.extend(['S', 'M']) specs.extend([ ([hs_len], np.float32), ([], np.bool), ]) templates.extend([ None, None, ]) if distillation: _fields.append('flatparam') logit_shape_dtype = lambda x: (make_pdtype(x).param_shape(), np. float32) param_shape_dtype = map_gym_space_to_structure( logit_shape_dtype, ac_space) param_templates = template_structure_from_gym_space(ac_space) specs.append(param_shape_dtype) templates.append(param_templates) if use_oppo_data: _fields.append('OPPO_X') specs.append(map_gym_space_to_structure(shape_dtype, ob_space)) templates.append(template_structure_from_gym_space(ob_space)) if use_lstm: _fields.append('OPPO_S') # oppo's mask is the same as self specs.append(([hs_len], np.float32)) templates.append(None) self.specs = specs self.templates = templates super(PGData, self).__init__(_fields, specs, templates)
def gym_ddpg_actor_test(): mycfg = { 'test': False, 'use_loss_type': 'none', 'use_value_head': False, 'n_v': 4, 'use_lstm': True, 'batch_size': 1, 'rollout_len': 1, 'nlstm': 64, 'hs_len': 64 * 2, 'lstm_layer_norm': True, 'weight_decay': 0.0005 } ob_space = spaces.Box(shape=(11, ), dtype=np.float32, low=0, high=1) ac_space = spaces.Box(shape=(2, ), low=-1.0, high=1.0, dtype=np.float32) nc = net_config_cls(ob_space, ac_space, **mycfg) inputs = net_inputs_placeholders_fun(nc) out = net_build_fun(inputs, nc, scope='gym_ddpg') sample = ob_space.sample() sess = tf.Session() tf.global_variables_initializer().run(session=sess) feed_dict = {inputs.X: [sample]} feed_dict[inputs.S] = np.zeros(shape=[1, nc.hs_len]) feed_dict[inputs.M] = np.zeros(shape=[1]) from tensorflow.contrib.framework import nest import tpolicies.tp_utils as tp_utils ac_structure = tp_utils.template_structure_from_gym_space(ac_space) a = nest.map_structure_up_to(ac_structure, lambda head: head.sam, out.self_fed_heads) sam = sess.run(a, feed_dict=feed_dict) print(sam) pass
def __init__(self, ob_space, ac_space, n_v, use_lstm=False, hs_len=None, distillation=False, version='v1', use_oppo_data=False): _fields = ['X', 'A', 'neglogp'] shape_dtype = lambda x: (x.shape, x.dtype) logit_shape_dtype = lambda x: (make_pdtype(x).param_shape(), np.float32) if version == 'v1': # neglogp/logits is one long vector neglogp_shape_dtype = ([len(ac_space.spaces)], np.float32) neglogp_templates = None logits_shape_dtype = (logit_shape_dtype(ac_space), np.float32) logits_templates = None elif version == 'v2': # neglogp/logits is structure same as ac_space neglogp_shape_dtype = map_gym_space_to_structure(lambda x: ([], np.float32), ac_space) neglogp_templates = template_structure_from_gym_space(ac_space) logits_shape_dtype = map_gym_space_to_structure(logit_shape_dtype, ac_space) logits_templates = template_structure_from_gym_space(ac_space) else: raise KeyError('version not support!') specs = [map_gym_space_to_structure(shape_dtype, ob_space), map_gym_space_to_structure(shape_dtype, ac_space), neglogp_shape_dtype] templates = [template_structure_from_gym_space(ob_space), template_structure_from_gym_space(ac_space), neglogp_templates] if use_lstm: assert int(hs_len) == hs_len _fields.extend(['S', 'M']) specs.extend([([hs_len], np.float32), ([], np.bool), ]) templates.extend([None, None, ]) if distillation: _fields.append('logits') specs.append(logits_shape_dtype) templates.append(logits_templates) if use_oppo_data: _fields.append('OPPO_X') specs.append(map_gym_space_to_structure(shape_dtype, ob_space)) templates.append(template_structure_from_gym_space(ob_space)) if use_lstm: _fields.append('OPPO_S') # oppo's mask is the same as self specs.append(([hs_len], np.float32)) templates.append(None) self.specs = specs self.templates = templates super(PGData, self).__init__(_fields, specs, templates)
def conv_lstm_actor_test(): mycfg = { 'test': False, 'use_loss_type': 'none', 'use_value_head': False, 'n_v': 4, 'sync_statistics': None, 'use_lstm': True, 'batch_size': 1, 'rollout_len': 1, 'nlstm': 64, 'hs_len': 64 * 2, 'lstm_layer_norm': True, 'weight_decay': 0.0005 } ob_space = spaces.Tuple([ spaces.Tuple([ spaces.Box(shape=(11, 11, 22), dtype=np.float32, low=0, high=1), spaces.Box(shape=(2, ), dtype=np.int32, low=0, high=10), spaces.Box(shape=[6], dtype=np.bool, low=0, high=1) ]) ] * 2) ac_space = spaces.Tuple([spaces.Discrete(n=6)] * 2) nc = net_config_cls(ob_space, ac_space, **mycfg) inputs = net_inputs_placeholders_fun(nc) out = net_build_fun(inputs, nc, scope='conv_lstm') sample = ob_space.sample() sess = tf.Session() tf.global_variables_initializer().run(session=sess) feed_dict = {} for s, input in zip(sample, inputs.X): for x_np, x in zip(s, input): feed_dict[x] = [x_np] feed_dict[inputs.S] = np.zeros(shape=[1, nc.hs_len]) feed_dict[inputs.M] = np.zeros(shape=[1]) from tensorflow.contrib.framework import nest import tpolicies.tp_utils as tp_utils ac_structure = tp_utils.template_structure_from_gym_space(ac_space) a = nest.map_structure_up_to(ac_structure, lambda head: head.sam, out.self_fed_heads) sam = sess.run(a, feed_dict=feed_dict) print(sam) pass
def __init__(self, policy, ob_space, ac_space, n_v=1, scope_name="model", policy_config=None, use_gpu_id=-1): # check assert hasattr(policy, 'net_config_cls') assert hasattr(policy, 'net_build_fun') assert hasattr(policy, 'net_inputs_placeholders_fun') # bookkeeping self.ob_space = ob_space self.ob_space = ac_space self._ac_structure = tp_utils.template_structure_from_gym_space( ac_space) # build the net if use_gpu_id < 0: # not using GPU self.sess = tf.Session() else: tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) tf_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_config) policy_config = {} if policy_config is None else policy_config policy_config['batch_size'] = 1 policy_config['test'] = True self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config) if use_gpu_id < 0: # not using GPU self.inputs_ph = policy.net_inputs_placeholders_fun(self.nc) self.net_out = policy.net_build_fun(self.inputs_ph, self.nc, scope=scope_name) else: with tf.device('/gpu:{}'.format(use_gpu_id)): self.inputs_ph = policy.net_inputs_placeholders_fun(self.nc) self.net_out = policy.net_build_fun(self.inputs_ph, self.nc, scope=scope_name) # saving/loading ops self.params = self.net_out.vars.all_vars self.params_ph = [ tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params ] self.params_assign_ops = [ p.assign(np_p) for p, np_p in zip(self.params, self.params_ph) ] # initialize the net params tf.global_variables_initializer().run(session=self.sess) # numpy rnn state stuff (if any) if self.net_out.S is None: self._hs_len = None self._state = None else: self._hs_len = self.net_out.S.shape[1].value self._state = np.zeros(shape=(self._hs_len, ), dtype=np.float32) pass
def __init__(self, league_mgr_addr, model_pool_addrs, port, ds, batch_size, ob_space, ac_space, policy, outputs=['a'], policy_config={}, gpu_id=0, compress=True, batch_worker_num=4, update_model_seconds=60, learner_id=None, log_seconds=60, model_key="", task_attr='model_key', **kwargs): self._update_model_seconds = update_model_seconds self._log_seconds = log_seconds self._learner_id = learner_id self._task_attr = task_attr.split('.') if model_key: # If model_key is given, this indicates the infserver works # for a fixed model inference self._league_mgr_apis = None self.is_rl = False self.model_key = model_key else: # If model_key is absent, this indicates an infserver # that performs varying policy inference, and model_key will be # assigned by querying league_mgr self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr) self.is_rl = True self.model_key = None self.model = None self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) assert hasattr(policy, 'net_config_cls') assert hasattr(policy, 'net_build_fun') # bookkeeping self.ob_space = ob_space self.ob_space = ac_space self.batch_size = batch_size self._ac_structure = tp_utils.template_structure_from_gym_space( ac_space) self.outputs = outputs # build the net policy_config = {} if policy_config is None else policy_config policy_config['batch_size'] = batch_size use_gpu = (gpu_id >= 0) self.data_server = InferDataServer( port=port, batch_size=batch_size, ds=ds, batch_worker_num=batch_worker_num, use_gpu=use_gpu, compress=compress, ) config = tf.ConfigProto(allow_soft_placement=True) if use_gpu: config.gpu_options.visible_device_list = str(gpu_id) config.gpu_options.allow_growth = True if 'use_xla' in policy_config and policy_config['use_xla']: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 self._sess = tf.Session(config=config) self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config) self.net_out = policy.net_build_fun(self.data_server._batch_input, self.nc, scope='Inf_server') # saving/loading ops self.params = self.net_out.vars.all_vars self.params_ph = [ tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params ] self.params_assign_ops = [ p.assign(np_p) for p, np_p in zip(self.params, self.params_ph) ] # initialize the net params tf.global_variables_initializer().run(session=self._sess) self.setup_fetches(outputs) self.id_and_fetches = [self.data_server._batch_data_id, self.fetches] self._update_model()
def __init__(self, policy, ob_space, ac_space, n_v=1, scope_name="model", policy_config=None, use_gpu_id=-1, infserver_addr=None, compress=True): # check assert hasattr(policy, 'net_config_cls') assert hasattr(policy, 'net_build_fun') assert hasattr(policy, 'net_inputs_placeholders_fun') # bookkeeping self.ob_space = ob_space self.ob_space = ac_space self._ac_structure = tp_utils.template_structure_from_gym_space(ac_space) self.infserver_addr = infserver_addr self.compress = compress # send compressed data to infserver self.n_v = n_v # number of reward channels policy_config = {} if policy_config is None else policy_config policy_config['batch_size'] = 1 policy_config['test'] = True self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config) self.rnn = (False if 'use_lstm' not in policy_config else policy_config['use_lstm']) # numpy rnn state stuff (if any) self._last_state = None if not self.rnn: self._hs_len = None self._state = None else: self._hs_len = self.nc.hs_len self._state = np.zeros(shape=(self._hs_len,), dtype=np.float32) if infserver_addr is None: # build the net if use_gpu_id < 0: # not using GPU self.sess = tf.Session() device = '/cpu:0' else: device = '/gpu:{}'.format(use_gpu_id) tf_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) tf_config.gpu_options.allow_growth = True self.sess = tf.Session(config=tf_config) with tf.device(device): self.inputs_ph = policy.net_inputs_placeholders_fun(self.nc) self.net_out = policy.net_build_fun(self.inputs_ph, self.nc, scope=scope_name) # saving/loading ops self.params = self.net_out.vars.all_vars self.params_ph = [tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params] self.params_assign_ops = [ p.assign(np_p) for p, np_p in zip(self.params, self.params_ph) ] # initialize the net params tf.global_variables_initializer().run(session=self.sess) else: ds = InfData(ob_space, ac_space, policy_config['use_self_fed_heads'], self.rnn, self._hs_len) self.apis = InfServerAPIs(infserver_addr, ds, compress) self.ds = ds