예제 #1
0
    def build_graph(self, trajs, trajs_len, reuse=False):
        with tf.variable_scope(self.scope):
            if reuse:
                tf.get_variable_scope().reuse_variables()

            #input normalize
            with tf.variable_scope("obfilter"):
                self.obs_rms = RunningMeanStd(shape=self.observation_shape)
            obs = (trajs[:, :, :self.num_observations] -
                   self.obs_rms.mean) / self.obs_rms.std
            feats = tf.concat((obs, trajs[:, :, self.num_observations:]), 2)
            #feats = trajs

            with tf.variable_scope("rnn"):
                cell = self._get_cell(self.hidden_size, self.cell_type, reuse)
                cell = tf.contrib.rnn.DropoutWrapper(
                    cell, output_keep_prob=self.dropout_keep_prob)
                outputs, _ = tf.nn.dynamic_rnn(cell=cell,
                                               inputs=feats,
                                               sequence_length=trajs_len,
                                               dtype=tf.float32)
                with tf.variable_scope('attention') as scope:
                    attn_outputs, weighted_eb = self.attention(
                        outputs, self.attention_size, scope)
                logits = self.shared_fc_layer(attn_outputs, reuse=False)
                rewards = self.shared_fc_layer(weighted_eb, reuse=True)
                #check_values = (outputs, attn_outputs, weighted_eb)
        return logits, rewards  #, check_values
예제 #2
0
  def build_graph(self, obs_ph, acs_ph, reuse=False):
    with tf.variable_scope(self.scope):
      if reuse:
        tf.get_variable_scope().reuse_variables()

      with tf.variable_scope("obfilter"):
          self.obs_rms = RunningMeanStd(shape=self.observation_shape)
      obs = (obs_ph - self.obs_rms.mean) / self.obs_rms.std
      _input = tf.concat([obs, acs_ph], axis=1) # concatenate the two input -> form a transition
      p_h1 = tf.contrib.layers.fully_connected(_input, self.hidden_size, activation_fn=tf.nn.tanh)
      p_h2 = tf.contrib.layers.fully_connected(p_h1, self.hidden_size, activation_fn=tf.nn.tanh)
      logits = tf.contrib.layers.fully_connected(p_h2, 1, activation_fn=tf.identity)
    return logits
예제 #3
0
    def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0)
        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(U.dense(last_out, hid_size, "vffc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:,0]

        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(U.dense(last_out, hid_size, "polfc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = U.dense(last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        # change for BC
        #stochastic = tf.placeholder(dtype=tf.bool, shape=())
        stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self.ac = ac
        self._act = U.function([stochastic, ob], [ac, self.vpred])
예제 #4
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(
            ob_space,
            gym.spaces.Box)  #ru guo hou mian tiao jian wei jia ze tui chu
        #print ("mlp_policy/20lines") zhi xing liang ci
        #print ("ac_space.shape[0]", ac_space.shape[0]) shu chu jie guo shi 3
        self.pdtype = pdtype = make_pdtype(
            ac_space
        )  #return DiagGaussianPdType(ac_space.shape[0]) zhe li mian zui hou you pdclass()
        sequence_length = None

        ob = U.get_placeholder(
            name="ob",
            dtype=tf.float32,
            shape=[sequence_length] + list(ob_space.shape)
        )  #return tf.placeholder(dtype=dtype, shape=shape, name=name)
        #print ("obspace.shape:::", list(ob_space.shape)) shu chu shi [11]
        with tf.variable_scope("obfilter"):
            #print("gail-tf/gailtf/baselines/ppo1/mlp_policy.py/28lines:")
            self.ob_rms = RunningMeanStd(
                shape=ob_space.shape)  #zhe ge han shu kan  bu dong

        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0,
                               5.0)  #ob zhe ge shi hou hai shi placeholder
        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "vffc%i" % (i + 1),
                        weight_init=U.normc_initializer(
                            1.0)))  #da jian le quan lian jie ceng
        self.vpred = U.dense(
            last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0)
        )[:,
          0]  #wen ti shi zhe li zui hou mei you shu chu dong zuo de kongjian

        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "polfc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            print("gaussian_fixed_var is used")
            mean = U.dense(last_out,
                           pdtype.param_shape()[0] // 2, "polfinal",
                           U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer())
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            #print ("gaussian_fixed_var is not used") mei you bei yong dao
            pdparam = U.dense(last_out,
                              pdtype.param_shape()[0], "polfinal",
                              U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(
            pdparam
        )  # mo rren shang mian de pdtype yi ding shi DiagGaussianPd return DiagGaussianPd
        #pd li mian you kl, entropy, sample deng fang fa
        self.state_in = []
        self.state_out = []

        # change for BC
        #stochastic = tf.placeholder(dtype=tf.bool, shape=())
        stochastic = U.get_placeholder(name="stochastic",
                                       dtype=tf.bool,
                                       shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self.ac = ac
        self._act = U.function([stochastic, ob], [ac, self.vpred])
예제 #5
0
    def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape))

        last_action = U.get_placeholder(shape=(None, 524), dtype=tf.float32, name="last_action_one_hot")
        self.msize = 64 # change to 64 later
        self.ssize = 64 
        self.isize = 11
        self.available_action_size = 524

        available_action = ob[:, (5*self.msize*self.msize+10*self.ssize*self.ssize+self.isize):(5*self.msize*self.msize+10*self.ssize*self.ssize+self.isize+self.available_action_size)]
        # ob = ob[:,:-(self.available_action_size)]

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        # obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -20.0, 20.0)
        obz = (ob - self.ob_rms.mean) / self.ob_rms.std

        minimap = obz[:, 0:5*self.msize*self.msize]
        # minimap /= 2
        screen = obz[:, 5*self.msize*self.msize: 5*self.msize*self.msize+ 10*self.ssize*self.ssize]
        # screen /= 2
        info = obz[:, (5*self.msize*self.msize+10*self.ssize*self.ssize):(5*self.msize*self.msize+10*self.ssize*self.ssize+self.isize)]
        # info /= 2


        # get value prediction, crtic
        mconv1 = tf.layers.conv2d(
            inputs=tf.reshape(minimap, [-1,self.msize,self.msize,5]),
            filters=32,
            kernel_size=[5, 5],
            padding="same",
            kernel_initializer=U.normc_initializer(0.01),
            activation=tf.nn.leaky_relu)
        mpool1 = tf.layers.max_pooling2d(inputs=mconv1, pool_size=[2, 2], strides=2)
        mconv2 = tf.layers.conv2d(
            inputs=mpool1,
            filters=64,
            kernel_size=[5, 5],
            padding="same",
            kernel_initializer=U.normc_initializer(0.01),
            activation=tf.nn.leaky_relu,
            name="vffcmconv2")
        mpool2 = tf.layers.max_pooling2d(inputs=mconv2, pool_size=[2, 2], strides=2)
        mpool2_flat = tf.reshape(mpool2, [-1, 16 * 16 * 64])

        sconv1 = tf.layers.conv2d(
            inputs=tf.reshape(screen, [-1,self.ssize, self.ssize,10]),
            filters=48,
            kernel_size=[5, 5],
            padding="same",
            kernel_initializer=U.normc_initializer(0.01),
            activation=tf.nn.leaky_relu)
        spool1 = tf.layers.max_pooling2d(inputs=sconv1, pool_size=[2, 2], strides=2)
        sconv2 = tf.layers.conv2d(
            inputs=spool1,
            filters=80,
            kernel_size=[5, 5],
            padding="same",
            kernel_initializer=U.normc_initializer(0.01),
            activation=tf.nn.leaky_relu)
        spool2 = tf.layers.max_pooling2d(inputs=sconv2, pool_size=[2, 2], strides=2)
        spool2_flat = tf.reshape(spool2, [-1, 16 * 16 * 80])

        info_fc = tf.layers.dense(inputs=layers.flatten(info),
                   units=8,
                   activation=tf.tanh)
        
        aa_fc = tf.layers.dense(inputs=layers.flatten(available_action),
                   units=32,
                   activation=tf.tanh)

        HIDDEN_SIZE = 128
        l1_action = tf.layers.dense(layers.flatten(last_action), 256, tf.nn.relu)
        input_to_rnn = tf.reshape(l1_action, [-1, 16, 16])
        action_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=HIDDEN_SIZE, 
            forget_bias=1.0, state_is_tuple=True)
        inputs_rnn = tf.unstack(input_to_rnn, num=16, axis=1)
        rnn_outputs,rnn_state= tf.contrib.rnn.static_rnn(action_lstm_cell,
            inputs_rnn, dtype=tf.float32)
        l2_action = tf.layers.dense(rnn_state[-1], 
            128, tf.nn.tanh)          # hidden layer
        last_acs_ph_lstm = tf.layers.dense(l2_action, 
            32, tf.nn.tanh)

        last_out = tf.concat([mpool2_flat, spool2_flat, info_fc, aa_fc, last_acs_ph_lstm], 
            axis=1)
        vf_last_out = tf.nn.tanh(U.dense(last_out, 1024, 'vf_last_out',
            weight_init=U.normc_initializer(1.0)))
        # vf_last_out_2 = tf.nn.tanh(U.dense(vf_last_out, 64, 'vf_last_out_2',
        #     weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(vf_last_out, 1, "vffinal", weight_init=U.normc_initializer(1.0))[:,0]

        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = U.dense(last_out, pdtype.param_shape()[0]//2, "polfinal", U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd", shape=[1, pdtype.param_shape()[0]//2], initializer=tf.zeros_initializer())
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            pol_last_out = U.dense(last_out, (pdtype.param_shape()[0])*5, "polfinaldense", U.normc_initializer(0.01))
            pdparam = U.dense(pol_last_out, pdtype.param_shape()[0], "polfinal", U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        # change for BC
        #stochastic = tf.placeholder(dtype=tf.bool, shape=())
        stochastic = U.get_placeholder(name="stochastic", dtype=tf.bool, shape=())
        ac = U.switch(stochastic, self.pd.sample(available_action), self.pd.mode(available_action))
        self.ac = ac
        self._act = U.function([stochastic, ob, last_action], [ac, self.vpred])
예제 #6
0
    def _init(self,
              ob_space,
              ac_space,
              hid_size,
              num_hid_layers,
              gaussian_fixed_var=True):
        assert isinstance(ob_space, gym.spaces.Box)

        self.pdtype = pdtype = make_pdtype(ac_space)
        sequence_length = None

        ob = U.get_placeholder(name="ob",
                               dtype=tf.float32,
                               shape=[sequence_length] + list(ob_space.shape))

        with tf.variable_scope("obfilter"):
            self.ob_rms = RunningMeanStd(shape=ob_space.shape)

        obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std,
                               -20.0, 20.0)
        last_out = obz
        for i in range(num_hid_layers):
            last_out = tf.nn.tanh(
                U.dense(last_out,
                        hid_size,
                        "vffc%i" % (i + 1),
                        weight_init=U.normc_initializer(1.0)))
        self.vpred = U.dense(last_out,
                             1,
                             "vffinal",
                             weight_init=U.normc_initializer(1.0))[:, 0]

        # last_out = obz
        # for i in range(num_hid_layers):
        #     last_out = tf.nn.tanh(U.dense(last_out, hid_size, "polfc%i"%(i+1), weight_init=U.normc_initializer(1.0)))
        ### add conv net instead of using dense
        self.msize = 64  # change to 64 later
        self.ssize = 64
        self.isize = 11
        self.available_action_size = 524
        minimap = obz[:, 0:5 * self.msize * self.msize]
        screen = obz[:,
                     5 * self.msize * self.msize:5 * self.msize * self.msize +
                     10 * self.ssize * self.ssize]
        info = obz[:, (5 * self.msize * self.msize +
                       10 * self.ssize * self.ssize):(
                           5 * self.msize * self.msize +
                           10 * self.ssize * self.ssize + self.isize)]
        available_action = obz[:, (5 * self.msize * self.msize +
                                   10 * self.ssize * self.ssize +
                                   self.isize):(5 * self.msize * self.msize +
                                                10 * self.ssize * self.ssize +
                                                self.isize +
                                                self.available_action_size)]

        conv1_minimap = tf.layers.conv2d(inputs=tf.reshape(
            minimap, [-1, self.msize, self.msize, 5]),
                                         filters=10,
                                         kernel_size=5,
                                         strides=1,
                                         padding='same',
                                         activation=tf.nn.leaky_relu,
                                         name="polmconv1")  # -> (64, 64, 10)
        pool1_minimap = tf.layers.max_pooling2d(
            conv1_minimap, pool_size=4, strides=4,
            name="polmpool1")  # -> (16, 16, 10)
        conv2_minimap = tf.layers.conv2d(pool1_minimap,
                                         10,
                                         5,
                                         1,
                                         'same',
                                         activation=tf.nn.relu,
                                         name="polmconv2")  # -> (16, 16, 10)
        pool2_minimap = tf.layers.max_pooling2d(
            conv2_minimap, 2, 2, name="polmpool2")  # -> (8, 8, 10)
        flat_minimap = tf.reshape(pool2_minimap,
                                  [-1, 8 * 8 * 10])  # -> (8*8*10, )
        # dense_minimap = tf.layers.dense(inputs=flat_minimap, units=1024, activation=tf.nn.relu)
        # # dropout_mininmap = tf.layers.dropout(
        # #     inputs=dense_minimap, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
        # minimap_output = tf.layers.dense(dense_minimap, 64)

        conv1_screen = tf.layers.conv2d(
            inputs=tf.reshape(screen,
                              [-1, self.ssize, self.ssize, 10]),  # (64,64,10)
            filters=20,
            kernel_size=5,
            strides=1,
            padding='same',
            activation=tf.nn.leaky_relu,
            name="polsconv1")  # -> (64, 64, 20)
        pool1_screen = tf.layers.max_pooling2d(
            conv1_screen, pool_size=4, strides=4,
            name="polspool1")  # -> (16, 16, 20)
        conv2_screen = tf.layers.conv2d(pool1_screen,
                                        20,
                                        5,
                                        1,
                                        'same',
                                        activation=tf.nn.relu,
                                        name="polsconv2")  # -> (16, 16, 20)
        pool2_screen = tf.layers.max_pooling2d(
            conv2_screen, 2, 2, name="polspool2")  # -> (8, 8, 20)
        flat_screen = tf.reshape(pool2_screen,
                                 [-1, 8 * 8 * 20])  # -> (8*8*20, )
        # dense_screen = tf.layers.dense(inputs=flat_screen, units=1024, activation=tf.nn.relu)
        # # dropout_screen = tf.layers.dropout(
        # #     inputs=dense_screen, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
        # screen_output = tf.layers.dense(dense_screen, 64, tf.nn.relu)

        info_fc = tf.layers.dense(inputs=layers.flatten(info),
                                  units=4,
                                  activation=tf.tanh,
                                  name="poldense1")

        aa_fc = tf.layers.dense(inputs=layers.flatten(available_action),
                                units=16,
                                activation=tf.tanh,
                                name="poldense2")

        last_out = tf.concat([flat_minimap, flat_screen, info_fc, aa_fc],
                             axis=1,
                             name="polconcat")
        # last_out = tf.layers.dense(inputs=last_out,units=600,name="poldense3")
        # last_out = tf.nn.tanh(U.dense(last_out, hid_size, "polfc1", weight_init=U.normc_initializer(1.0)))

        if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box):
            mean = U.dense(last_out,
                           pdtype.param_shape()[0] // 2, "polfinal",
                           U.normc_initializer(0.01))
            logstd = tf.get_variable(name="logstd",
                                     shape=[1, pdtype.param_shape()[0] // 2],
                                     initializer=tf.zeros_initializer())
            pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
        else:
            pdparam = U.dense(last_out,
                              pdtype.param_shape()[0], "polfinal",
                              U.normc_initializer(0.01))

        self.pd = pdtype.pdfromflat(pdparam)

        self.state_in = []
        self.state_out = []

        # change for BC
        #stochastic = tf.placeholder(dtype=tf.bool, shape=())
        stochastic = U.get_placeholder(name="stochastic",
                                       dtype=tf.bool,
                                       shape=())
        ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
        self.ac = ac
        self._act = U.function([stochastic, ob], [ac, self.vpred])
예제 #7
0
    def build_graph(self, obs_ph, acs_ph, last_acs_ph, reuse=False):
        with tf.variable_scope(self.scope):
            if reuse:
                tf.get_variable_scope().reuse_variables()

            available_action = obs_ph[:, (
                5 * self.msize * self.msize + 10 * self.ssize * self.ssize +
                self.isize):(5 * self.msize * self.msize +
                             10 * self.ssize * self.ssize + self.isize +
                             self.available_action_size)]
            # obs_ph = obs_ph[:, :-524]

            with tf.variable_scope("obfilter"):
                self.obs_rms = RunningMeanStd(shape=self.observation_shape)
            obz = (obs_ph - self.obs_rms.mean) / self.obs_rms.std

            minimap = obz[:, 0:5 * self.msize * self.msize]
            # minimap /= 2
            screen = obz[:, 5 * self.msize *
                         self.msize:5 * self.msize * self.msize +
                         10 * self.ssize * self.ssize]
            # screen /= 2
            info = obz[:, (5 * self.msize * self.msize +
                           10 * self.ssize * self.ssize):(
                               5 * self.msize * self.msize +
                               10 * self.ssize * self.ssize + self.isize)]
            # info /= 2

            mconv1 = tf.layers.conv2d(inputs=tf.reshape(
                minimap, [-1, self.msize, self.msize, 5]),
                                      filters=32,
                                      kernel_size=[5, 5],
                                      padding="same",
                                      activation=tf.nn.leaky_relu)
            mpool1 = tf.layers.max_pooling2d(inputs=mconv1,
                                             pool_size=[2, 2],
                                             strides=2)
            mconv2 = tf.layers.conv2d(inputs=mpool1,
                                      filters=64,
                                      kernel_size=[5, 5],
                                      padding="same",
                                      activation=tf.nn.leaky_relu)
            mpool2 = tf.layers.max_pooling2d(inputs=mconv2,
                                             pool_size=[2, 2],
                                             strides=2)
            mpool2_flat = tf.reshape(mpool2, [-1, 16 * 16 * 64])

            sconv1 = tf.layers.conv2d(inputs=tf.reshape(
                screen, [-1, self.ssize, self.ssize, 10]),
                                      filters=48,
                                      kernel_size=[5, 5],
                                      padding="same",
                                      activation=tf.nn.leaky_relu)
            spool1 = tf.layers.max_pooling2d(inputs=sconv1,
                                             pool_size=[2, 2],
                                             strides=2)
            sconv2 = tf.layers.conv2d(inputs=spool1,
                                      filters=80,
                                      kernel_size=[5, 5],
                                      padding="same",
                                      activation=tf.nn.leaky_relu)
            spool2 = tf.layers.max_pooling2d(inputs=sconv2,
                                             pool_size=[2, 2],
                                             strides=2)
            spool2_flat = tf.reshape(spool2, [-1, 16 * 16 * 80])

            info_fc = layers.fully_connected(layers.flatten(info),
                                             num_outputs=8,
                                             activation_fn=tf.tanh)

            aa_fc = layers.fully_connected(layers.flatten(available_action),
                                           num_outputs=32,
                                           activation_fn=tf.tanh)

            # _input = tf.concat([obs, acs_ph], axis=1) # concatenate the two input -> form a transition
            acs_ph_temp = tf.identity(acs_ph)
            acs_ph_temp = tf.expand_dims(acs_ph_temp, 1)
            # HIDDEN_SIZE = 128
            # l1_action = tf.layers.dense(layers.flatten(acs_ph_temp), 256, tf.nn.relu)
            # input_to_rnn = tf.reshape(l1_action, [-1, 16, 16])
            # action_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=1.0, state_is_tuple=True)
            # inputs_rnn = tf.unstack(input_to_rnn, num=16, axis=1)
            # rnn_outputs,rnn_state= tf.contrib.rnn.static_rnn(action_lstm_cell,inputs_rnn,dtype=tf.float32)
            # l2_action = tf.layers.dense(rnn_state[-1], 128, tf.nn.tanh)          # hidden layer
            # acs_ph_lstm = tf.layers.dense(l2_action, 32, tf.nn.tanh)
            acs_ph_dense_output = layers.fully_connected(
                layers.flatten(acs_ph_temp),
                num_outputs=32,
                activation_fn=tf.tanh)

            last_acs_ph_temp = tf.identity(last_acs_ph)
            last_acs_ph_temp = tf.expand_dims(last_acs_ph_temp, 1)
            last_acs_ph_dense_output = layers.fully_connected(
                layers.flatten(last_acs_ph_temp),
                num_outputs=32,
                activation_fn=tf.tanh)

            _input = tf.concat([
                mpool2_flat, spool2_flat, info_fc, aa_fc, acs_ph_dense_output,
                last_acs_ph_dense_output
            ],
                               axis=1)
            p_h1 = tf.contrib.layers.fully_connected(_input,
                                                     self.hidden_size,
                                                     activation_fn=tf.nn.tanh)
            p_h2 = tf.contrib.layers.fully_connected(p_h1,
                                                     self.hidden_size,
                                                     activation_fn=tf.nn.tanh)
            logits = tf.contrib.layers.fully_connected(
                p_h2, 1, activation_fn=tf.identity)
        return logits