def _init(self, ob_space, ac_space, hid_size, feat_size, gaussian_fixed_var=True): num_hid_layers = len(hid_size) mean_emb = ob_space.dim_mean_embs nr_rec_obs = mean_emb[0] # each agents receives n_agents - 1 observations... dim_rec_obs = mean_emb[1] # ... each of size dim_rec_obs ... dim_flat_obs = ob_space.dim_flat_o # ... plus a local observation assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) # a row in ob contains an agent's flattened observation, the first dimension needs to be None because we use it # for training and inference, i.e.[None, (n_agents - 1) * dim_rec_obs + dim_flat_obs] ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=(None,) + ob_space.shape) flat_obs_input_layer = tf.slice(ob, [0, 0], [-1, nr_rec_obs * dim_rec_obs]) # grab only the part that goes into mean embedding flat_feature_input_layer = tf.slice(ob, [0, nr_rec_obs * dim_rec_obs], [-1, dim_flat_obs]) # grab only the local observation with tf.variable_scope('vf'): with tf.variable_scope('me'): me_v = me.MeanEmbedding(flat_obs_input_layer, feat_size, nr_rec_obs, dim_rec_obs) last_out = tf.concat([me_v.me_out, flat_feature_input_layer], axis=1) for i in range(num_hid_layers): last_out = tf.layers.dense(last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tfc.layers.layer_norm(last_out) last_out = tf.nn.relu(last_out) self.vpred = tf.layers.dense(last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:,0] with tf.variable_scope('pol'): with tf.variable_scope('me'): me_pi = me.MeanEmbedding(flat_obs_input_layer, feat_size, nr_rec_obs, dim_rec_obs) last_out = tf.concat([me_pi.me_out, flat_feature_input_layer], axis=1) for i in range(num_hid_layers): last_out = tf.layers.dense(last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tfc.layers.layer_norm(last_out) last_out = tf.nn.relu(last_out) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense(last_out, pdtype.param_shape()[0]//2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense(last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function([stochastic, ob], [ac, self.vpred]) self._me_v = U.function([ob], [me_v.me_out]) self._me_pi = U.function([ob], [me_pi.me_out])
def _init(self, ob_space, ac_space, hid_size, feat_size, gaussian_fixed_var=True): num_hid_layers = len(hid_size) neighbor_info = ob_space.dim_rec_o nr_rec_obs = neighbor_info[0] dim_rec_obs = neighbor_info[1] rest = ob_space.dim_flat_o - ob_space.dim_local_o dim_flat_obs = ob_space.dim_flat_o assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=(None, ) + ob_space.shape) flat_obs_input_layer_0 = tf.slice(ob, [0, 0], [-1, nr_rec_obs * dim_rec_obs]) flat_obs_input_layer_1 = tf.slice(ob, [0, nr_rec_obs * dim_rec_obs], [-1, rest]) flat_feature_input_layer = tf.slice( ob, [0, nr_rec_obs * dim_rec_obs + rest], [-1, ob_space.dim_local_o]) with tf.variable_scope('vf'): with tf.variable_scope('input_0'): input_0_v = tf.layers.dense( flat_obs_input_layer_0, feat_size[0][0], name="fc0", kernel_initializer=U.normc_initializer(1.0)) with tf.variable_scope('input_1'): input_1_v = tf.layers.dense( flat_obs_input_layer_1, feat_size[1][0], name="fc0", kernel_initializer=U.normc_initializer(1.0)) last_out = tf.concat( [input_0_v, input_1_v, flat_feature_input_layer], axis=1) for i in range(num_hid_layers): last_out = tf.layers.dense( last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tfc.layers.layer_norm(last_out) last_out = tf.nn.relu(last_out) self.vpred = tf.layers.dense( last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:, 0] with tf.variable_scope('pol'): with tf.variable_scope('input_0'): input_0_pi = tf.layers.dense( flat_obs_input_layer_0, feat_size[0][0], name="fc0", kernel_initializer=U.normc_initializer(1.0)) with tf.variable_scope('input_1'): input_1_pi = tf.layers.dense( flat_obs_input_layer_1, feat_size[1][0], name="fc0", kernel_initializer=U.normc_initializer(1.0)) last_out = tf.concat( [input_0_pi, input_1_pi, flat_feature_input_layer], axis=1) for i in range(num_hid_layers): last_out = tf.layers.dense( last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tfc.layers.layer_norm(last_out) last_out = tf.nn.relu(last_out) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense( last_out, pdtype.param_shape()[0] // 2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable( name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense( last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function([stochastic, ob], [ac, self.vpred])
def _init(self, ob_space, ac_space, hid_size, feat_size, gaussian_fixed_var=True): num_hid_layers = len(hid_size) n_mean_embs = len(ob_space.dim_mean_embs) mean_emb_0 = ob_space.dim_mean_embs[0] mean_emb_1 = ob_space.dim_mean_embs[1] nr_obs_0 = mean_emb_0[0] dim_obs_0 = mean_emb_0[1] nr_obs_1 = mean_emb_1[0] dim_obs_1 = mean_emb_1[1] dim_flat_obs = ob_space.dim_flat_o assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=(None, ) + ob_space.shape) mean_emb_0_input_layer = tf.slice(ob, [0, 0], [-1, nr_obs_0 * dim_obs_0]) mean_emb_1_input_layer = tf.slice(ob, [0, nr_obs_0 * dim_obs_0], [-1, nr_obs_1 * dim_obs_1]) flat_feature_input_layer = tf.slice( ob, [0, nr_obs_0 * dim_obs_0 + nr_obs_1 * dim_obs_1], [-1, dim_flat_obs]) with tf.variable_scope('vf'): with tf.variable_scope('me_rec'): me_v_rec = me.MeanEmbedding(mean_emb_0_input_layer, feat_size[0], nr_obs_0, dim_obs_0) with tf.variable_scope('me_local'): me_v_local = me.MeanEmbedding(mean_emb_1_input_layer, feat_size[1], nr_obs_1, dim_obs_1) last_out = tf.concat( [me_v_rec.me_out, me_v_local.me_out, flat_feature_input_layer], axis=1) for i in range(num_hid_layers): last_out = tf.layers.dense( last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tfc.layers.layer_norm(last_out) last_out = tf.nn.relu(last_out) self.vpred = tf.layers.dense( last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:, 0] with tf.variable_scope('pol'): with tf.variable_scope('me_rec'): me_pi_rec = me.MeanEmbedding(mean_emb_0_input_layer, feat_size[0], nr_obs_0, dim_obs_0) with tf.variable_scope('me_local'): me_pi_local = me.MeanEmbedding(mean_emb_1_input_layer, feat_size[1], nr_obs_1, dim_obs_1) last_out = tf.concat([ me_pi_rec.me_out, me_pi_local.me_out, flat_feature_input_layer ], axis=1) for i in range(num_hid_layers): last_out = tf.layers.dense( last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tfc.layers.layer_norm(last_out) last_out = tf.nn.relu(last_out) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense( last_out, pdtype.param_shape()[0] // 2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable( name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense( last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function([stochastic, ob], [ac, self.vpred]) self._me = U.function([ob], [me])
def _init(self, ob_space, ac_space, hid_size, gaussian_fixed_var=True): num_hid_layers = len(hid_size) assert isinstance(ob_space, gym.spaces.Box) self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) # with tf.variable_scope("retfilter"): # self.ret_rms = RunningMeanStd(shape=1) with tf.variable_scope('vf'): obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) # last_out = obz last_out = ob for i in range(num_hid_layers): last_out = tf.layers.dense( last_out, hid_size[i], name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tc.layers.layer_norm(last_out, center=True, scale=True) last_out = tf.nn.relu(last_out) self.vpred = tf.layers.dense( last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:, 0] with tf.variable_scope('pol'): # last_out = obz last_out = ob for i in range(num_hid_layers): last_out = tf.layers.dense( last_out, hid_size[i], name='fc%i' % (i + 1), kernel_initializer=U.normc_initializer(1.0)) if self.layer_norm: last_out = tc.layers.layer_norm(last_out, center=True, scale=True) last_out = tf.nn.relu(last_out) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense( last_out, pdtype.param_shape()[0] // 2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable( name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense( last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function([stochastic, ob], [ac, self.vpred])