def _init(self, ob_space, ac_space, hid_size, feat_size, gaussian_fixed_var=True):

        num_hid_layers = len(hid_size)
        mean_emb = ob_space.dim_mean_embs
        nr_rec_obs = mean_emb[0]  # each agents receives n_agents - 1 observations...
        dim_rec_obs = mean_emb[1]  # ... each of size dim_rec_obs ...
        dim_flat_obs = ob_space.dim_flat_o  # ... plus a local observation

        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        # a row in ob contains an agent's flattened observation, the first dimension needs to be None because we use it
        # for training and inference, i.e.[None, (n_agents - 1) * dim_rec_obs + dim_flat_obs]
        ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=(None,) + ob_space.shape)

        flat_obs_input_layer = tf.slice(ob, [0, 0], [-1, nr_rec_obs * dim_rec_obs])  # grab only the part that goes into mean embedding
        flat_feature_input_layer = tf.slice(ob, [0, nr_rec_obs * dim_rec_obs], [-1, dim_flat_obs])  # grab only the local observation

        with tf.variable_scope('vf'):
            with tf.variable_scope('me'):
                me_v = me.MeanEmbedding(flat_obs_input_layer, feat_size, nr_rec_obs, dim_rec_obs)
            last_out = tf.concat([me_v.me_out, flat_feature_input_layer], axis=1)
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(last_out, hid_size[i], name="fc%i" % (i + 1),
                                           kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tfc.layers.layer_norm(last_out)
                last_out = tf.nn.relu(last_out)

            self.vpred = tf.layers.dense(last_out, 1, name='final', kernel_initializer=U.normc_initializer(1.0))[:,0]

        with tf.variable_scope('pol'):
            with tf.variable_scope('me'):
                me_pi = me.MeanEmbedding(flat_obs_input_layer, feat_size, nr_rec_obs, dim_rec_obs)
            last_out = tf.concat([me_pi.me_out, flat_feature_input_layer], axis=1)
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(last_out, hid_size[i], name="fc%i" % (i + 1),
                                           kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tfc.layers.layer_norm(last_out)
                last_out = tf.nn.relu(last_out)

            if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
                mean = tf.layers.dense(last_out, pdtype.param_shape()[0]//2, name='final', kernel_initializer=U.normc_initializer(0.01))
                logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
                pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            else:
                pdparam = tf.layers.dense(last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])
        self._me_v = U.function([ob], [me_v.me_out])
        self._me_pi = U.function([ob], [me_pi.me_out])
Exemple #2
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              feat_size,
              gaussian_fixed_var=True):

        num_hid_layers = len(hid_size)
        neighbor_info = ob_space.dim_rec_o
        nr_rec_obs = neighbor_info[0]
        dim_rec_obs = neighbor_info[1]
        rest = ob_space.dim_flat_o - ob_space.dim_local_o
        dim_flat_obs = ob_space.dim_flat_o

        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=(None, ) + ob_space.shape)

        flat_obs_input_layer_0 = tf.slice(ob, [0, 0],
                                          [-1, nr_rec_obs * dim_rec_obs])
        flat_obs_input_layer_1 = tf.slice(ob, [0, nr_rec_obs * dim_rec_obs],
                                          [-1, rest])
        flat_feature_input_layer = tf.slice(
            ob, [0, nr_rec_obs * dim_rec_obs + rest],
            [-1, ob_space.dim_local_o])

        with tf.variable_scope('vf'):
            with tf.variable_scope('input_0'):
                input_0_v = tf.layers.dense(
                    flat_obs_input_layer_0,
                    feat_size[0][0],
                    name="fc0",
                    kernel_initializer=U.normc_initializer(1.0))
            with tf.variable_scope('input_1'):
                input_1_v = tf.layers.dense(
                    flat_obs_input_layer_1,
                    feat_size[1][0],
                    name="fc0",
                    kernel_initializer=U.normc_initializer(1.0))
            last_out = tf.concat(
                [input_0_v, input_1_v, flat_feature_input_layer], axis=1)
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(
                    last_out,
                    hid_size[i],
                    name="fc%i" % (i + 1),
                    kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tfc.layers.layer_norm(last_out)
                last_out = tf.nn.relu(last_out)

            self.vpred = tf.layers.dense(
                last_out,
                1,
                name='final',
                kernel_initializer=U.normc_initializer(1.0))[:, 0]

        with tf.variable_scope('pol'):
            with tf.variable_scope('input_0'):
                input_0_pi = tf.layers.dense(
                    flat_obs_input_layer_0,
                    feat_size[0][0],
                    name="fc0",
                    kernel_initializer=U.normc_initializer(1.0))
            with tf.variable_scope('input_1'):
                input_1_pi = tf.layers.dense(
                    flat_obs_input_layer_1,
                    feat_size[1][0],
                    name="fc0",
                    kernel_initializer=U.normc_initializer(1.0))
            last_out = tf.concat(
                [input_0_pi, input_1_pi, flat_feature_input_layer], axis=1)
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(
                    last_out,
                    hid_size[i],
                    name="fc%i" % (i + 1),
                    kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tfc.layers.layer_norm(last_out)
                last_out = tf.nn.relu(last_out)

            if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
                mean = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0] // 2,
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))
                logstd = tf.get_variable(
                    name="logstd",
                    shape=[1, pdtype.param_shape()[0] // 2],
                    initializer=tf.zeros_initializer())
                pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            else:
                pdparam = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0],
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              feat_size,
              gaussian_fixed_var=True):

        num_hid_layers = len(hid_size)
        n_mean_embs = len(ob_space.dim_mean_embs)
        mean_emb_0 = ob_space.dim_mean_embs[0]
        mean_emb_1 = ob_space.dim_mean_embs[1]
        nr_obs_0 = mean_emb_0[0]
        dim_obs_0 = mean_emb_0[1]

        nr_obs_1 = mean_emb_1[0]
        dim_obs_1 = mean_emb_1[1]

        dim_flat_obs = ob_space.dim_flat_o

        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=(None, ) + ob_space.shape)

        mean_emb_0_input_layer = tf.slice(ob, [0, 0],
                                          [-1, nr_obs_0 * dim_obs_0])
        mean_emb_1_input_layer = tf.slice(ob, [0, nr_obs_0 * dim_obs_0],
                                          [-1, nr_obs_1 * dim_obs_1])
        flat_feature_input_layer = tf.slice(
            ob, [0, nr_obs_0 * dim_obs_0 + nr_obs_1 * dim_obs_1],
            [-1, dim_flat_obs])

        with tf.variable_scope('vf'):
            with tf.variable_scope('me_rec'):
                me_v_rec = me.MeanEmbedding(mean_emb_0_input_layer,
                                            feat_size[0], nr_obs_0, dim_obs_0)
            with tf.variable_scope('me_local'):
                me_v_local = me.MeanEmbedding(mean_emb_1_input_layer,
                                              feat_size[1], nr_obs_1,
                                              dim_obs_1)
            last_out = tf.concat(
                [me_v_rec.me_out, me_v_local.me_out, flat_feature_input_layer],
                axis=1)
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(
                    last_out,
                    hid_size[i],
                    name="fc%i" % (i + 1),
                    kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tfc.layers.layer_norm(last_out)
                last_out = tf.nn.relu(last_out)

            self.vpred = tf.layers.dense(
                last_out,
                1,
                name='final',
                kernel_initializer=U.normc_initializer(1.0))[:, 0]

        with tf.variable_scope('pol'):
            with tf.variable_scope('me_rec'):
                me_pi_rec = me.MeanEmbedding(mean_emb_0_input_layer,
                                             feat_size[0], nr_obs_0, dim_obs_0)
            with tf.variable_scope('me_local'):
                me_pi_local = me.MeanEmbedding(mean_emb_1_input_layer,
                                               feat_size[1], nr_obs_1,
                                               dim_obs_1)
            last_out = tf.concat([
                me_pi_rec.me_out, me_pi_local.me_out, flat_feature_input_layer
            ],
                                 axis=1)
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(
                    last_out,
                    hid_size[i],
                    name="fc%i" % (i + 1),
                    kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tfc.layers.layer_norm(last_out)
                last_out = tf.nn.relu(last_out)

            if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
                mean = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0] // 2,
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))
                logstd = tf.get_variable(
                    name="logstd",
                    shape=[1, pdtype.param_shape()[0] // 2],
                    initializer=tf.zeros_initializer())
                pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            else:
                pdparam = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0],
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])
        self._me = U.function([ob], [me])
Exemple #4
0
    def _init(self, ob_space, ac_space, hid_size, gaussian_fixed_var=True):

        num_hid_layers = len(hid_size)

        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        # with tf.variable_scope("retfilter"):
        #     self.ret_rms = RunningMeanStd(shape=1)

        with tf.variable_scope('vf'):
            obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std,
                                   -5.0, 5.0)
            # last_out = obz
            last_out = ob
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(
                    last_out,
                    hid_size[i],
                    name="fc%i" % (i + 1),
                    kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tc.layers.layer_norm(last_out,
                                                    center=True,
                                                    scale=True)
                last_out = tf.nn.relu(last_out)
            self.vpred = tf.layers.dense(
                last_out,
                1,
                name='final',
                kernel_initializer=U.normc_initializer(1.0))[:, 0]

        with tf.variable_scope('pol'):
            # last_out = obz
            last_out = ob
            for i in range(num_hid_layers):
                last_out = tf.layers.dense(
                    last_out,
                    hid_size[i],
                    name='fc%i' % (i + 1),
                    kernel_initializer=U.normc_initializer(1.0))
                if self.layer_norm:
                    last_out = tc.layers.layer_norm(last_out,
                                                    center=True,
                                                    scale=True)
                last_out = tf.nn.relu(last_out)
            if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
                mean = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0] // 2,
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))
                logstd = tf.get_variable(
                    name="logstd",
                    shape=[1, pdtype.param_shape()[0] // 2],
                    initializer=tf.zeros_initializer())
                pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1)
            else:
                pdparam = tf.layers.dense(
                    last_out,
                    pdtype.param_shape()[0],
                    name='final',
                    kernel_initializer=U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        stochastic = tf.placeholder(dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self._act = U.function([stochastic, ob], [ac, self.vpred])