예제 #1
0
파일: C51.py 프로젝트: zxhSAMA/C51DQN
 def build_net(self,
               state,
               action,
               c_names,
               units_1,
               units_2,
               w_i,
               b_i,
               reg=None):
     with tf.variable_scope('conv1'):
         conv1 = conv(state, [5, 5, 3, 6], [6], [1, 2, 2, 1], w_i, b_i)
     with tf.variable_scope('conv2'):
         conv2 = conv(conv1, [3, 3, 6, 12], [12], [1, 2, 2, 1], w_i, b_i)
     with tf.variable_scope('flatten'):
         flatten = tf.contrib.layers.flatten(conv2)
         # 两种reshape写法
         # flatten = tf.reshape(relu5, [-1, np.prod(relu5.get_shape().as_list()[1:])])
         # flatten = tf.reshape(relu5, [-1, np.prod(relu5.shape.as_list()[1:])])
         # print flatten.get_shape()
     with tf.variable_scope('dense1'):
         dense1 = dense(flatten, units_1, [units_1], w_i, b_i)
     with tf.variable_scope('dense2'):
         dense2 = dense(dense1, units_2, [units_2], w_i, b_i)
     with tf.variable_scope('concat'):
         concatenated = tf.concat([dense2, tf.cast(action, tf.float32)], 1)
     with tf.variable_scope('dense3'):
         dense3 = dense(concatenated, self.atoms, [self.atoms], w_i, b_i)
     return dense3
예제 #2
0
파일: a3C.py 프로젝트: usodonkey/A3C
 def get_mu_sigma(self):
     with tf.variable_scope('actor'):
         w_i = tf.random_uniform_initializer(0., 0.1)
         dense1 = dense(self.state_input,
                        200,
                        None,
                        w_i,
                        None,
                        activation=tf.nn.relu6)
         with tf.variable_scope('mu'):
             mu = dense(dense1,
                        self.action_dim,
                        None,
                        w_i,
                        None,
                        activation=tf.nn.tanh)
         with tf.variable_scope('sigma'):
             sigma = dense(dense1,
                           self.action_dim,
                           None,
                           w_i,
                           None,
                           activation=tf.nn.softplus)
         # return mu * self.config.ACTION_BOUND[1], sigma + 1e-4
         return mu, sigma + 1e-4
예제 #3
0
파일: a3C.py 프로젝트: usodonkey/A3C
 def v(self):
     with tf.variable_scope('critic'):
         w_i = tf.random_uniform_initializer(0., 0.1)
         b_i = tf.zeros_initializer()
         with tf.variable_scope('dense1'):
             dense1 = dense(self.state_input,
                            100, [100],
                            w_i,
                            activation=tf.nn.relu6)
         with tf.variable_scope('dense2'):
             dense2 = dense(dense1, 1, [1], w_i, b_i, activation=None)
         return dense2
예제 #4
0
    def general_actor(self,
                      name,
                      num_inputs,
                      num_outputs,
                      ext=1,
                      reuse=False):  # ext = -1 for flexion net
        # Extension network : positif if state < state_input
        # This is the first network that will try to implement some notion of :
        # - symmetry. same network used for left and right leg. Only state input changes.
        # - phase based network. two different network groups used for swing and stance.
        # - general pd control.
        # Graph shared with Value Net
        with tf.variable_scope('actor/{}'.format(name)):
            state_dim = num_inputs

            cst_input = tf.constant(np.float32(np.zeros([1, 1])))
            #cst_input = tf.constant(np.float32(np.random.randn(1,state_dim)))
            state_input = self.state_input
            state_target = tf.contrib.layers.fully_connected(
                inputs=cst_input,
                num_outputs=state_dim,
                activation_fn=None,
                biases_initializer=tf.random_uniform_initializer(-1.0, 1.0),
                scope="stateTarget")

            state = tf.contrib.layers.flatten(state_input)
            actionNet = tf.nn.relu(ext * (state_target - state))

            #assert actionNet.shape == [1,state_dim]

            #w_i = tf.random_normal_initializer(0., 0.1)
            w_i = tf.random_uniform_initializer(0., 0.1)
            with tf.variable_scope('mu'):
                mu = dense(actionNet,
                           num_outputs,
                           None,
                           w_i,
                           None,
                           activation=tf.abs)
                #variable_summaries_history(mu,self.is_local_net)
            with tf.variable_scope('sigma'):
                sigma = dense(actionNet,
                              num_outputs,
                              None,
                              w_i,
                              None,
                              activation=tf.nn.sigmoid)
                #variable_summaries_history(sigma,self.is_local_net)

            return mu, sigma / 10.0
예제 #5
0
 def value_get_current(self):
     with tf.variable_scope('critic'):
         w_i = tf.random_uniform_initializer(0., 0.1)
         type = self.config.CRITIC_NETWORK_TYPE
         if type == 1:
             with tf.variable_scope('dense1'):
                 dense1 = dense(self.state_input,
                                256, [256],
                                w_i,
                                activation=tf.nn.relu)
                 if self.name == "W_0":
                     variable_summaries_layer(dense1, self.is_local_net)
             with tf.variable_scope('dense2'):
                 dense2 = dense(dense1, 1, [1], w_i, activation=None)
                 if self.name == "W_0":
                     variable_summaries_layer(dense2, self.is_local_net)
             return dense2
         elif type == 2:
             with tf.variable_scope('dense1'):
                 dense1 = dense(self.state_input,
                                512, [512],
                                w_i,
                                activation=tf.nn.relu)
                 variable_summaries_layer(dense1, self.is_local_net)
             with tf.variable_scope('dense2'):
                 dense2 = dense(dense1,
                                256, [256],
                                w_i,
                                activation=tf.nn.relu)
                 variable_summaries_layer(dense2, self.is_local_net)
             with tf.variable_scope('dense3'):
                 dense3 = dense(dense2, 1, [1], w_i, b_i, activation=None)
                 variable_summaries_layer(dense3, self.is_local_net)
             return dense3
         else:
             with tf.variable_scope('dense1'):
                 dense1 = dense(self.state_input,
                                128, [128],
                                w_i,
                                activation=tf.nn.selu)
                 variable_summaries_layer(dense1, self.is_local_net)
             with tf.variable_scope('dense2'):
                 dense2 = dense(self.state_input,
                                128, [128],
                                w_i,
                                activation=tf.nn.selu)
                 variable_summaries_layer(dense2, self.is_local_net)
             with tf.variable_scope('dense3'):
                 dense3 = dense(tf.concat([dense1, dense2], axis=1),
                                1, [1],
                                w_i,
                                b_i,
                                activation=None)
                 variable_summaries_layer(dense3, self.is_local_net)
             return dense3
예제 #6
0
 def a_prob(self):
     with tf.variable_scope('actor'):
         w_i = tf.random_uniform_initializer(0., 0.1)
         b_i = tf.zeros_initializer()
         with tf.variable_scope('dense1'):
             dense1 = dense(self.state_input,
                            200,
                            None,
                            w_i,
                            b_i,
                            activation=tf.nn.relu6)
         with tf.variable_scope('dense2'):
             dense2 = dense(dense1,
                            self.action_dim,
                            None,
                            w_i,
                            b_i,
                            activation=tf.nn.softmax)
         return dense2
예제 #7
0
    def get_mu_sigma(self, reuse=False):
        # Graph shared with Value Net
        with tf.variable_scope('actor'):
            w_i = tf.random_normal_initializer(0., 1.0)
            b_i = tf.zeros_initializer()
            if self.config.ACTOR_NETWORK_TYPE == 1:
                dense1 = dense(self.state_input,
                               256,
                               None,
                               w_i,
                               b_i,
                               activation=None)
                # dense1 = tf.contrib.layers.fully_connected(
                #     inputs=tf.contrib.layers.flatten(self.state_input),
                #     num_outputs=256,
                #     scope="dense1")
                variable_summaries_layer(dense1, self.is_local_net)
                if not reuse:
                    tf.contrib.layers.summarize_activation(dense1)
                with tf.variable_scope('mu'):
                    mu = dense(dense1,
                               self.action_dim,
                               None,
                               w_i,
                               None,
                               activation=tf.sin)
                    variable_summaries_history(mu, self.is_local_net)
                with tf.variable_scope('sigma'):
                    sigma = dense(dense1,
                                  self.action_dim,
                                  None,
                                  w_i,
                                  None,
                                  activation=tf.nn.sigmoid)
                    variable_summaries_history(sigma, self.is_local_net)
            elif self.config.ACTOR_NETWORK_TYPE == 2:
                conv1 = tf.contrib.layers.conv2d(self.state_input,
                                                 16,
                                                 8,
                                                 self.config.TEMPORAL_WINDOW,
                                                 activation_fn=lambda x:
                                                 (tf.nn.relu(x)),
                                                 scope="conv1")
                conv2 = tf.contrib.layers.conv2d(conv1,
                                                 32,
                                                 self.config.TEMPORAL_WINDOW,
                                                 2,
                                                 activation_fn=lambda x:
                                                 (tf.nn.relu(x)),
                                                 scope="conv2")
                # Fully connected layer
                dense2 = tf.contrib.layers.fully_connected(
                    inputs=tf.contrib.layers.flatten(conv2),
                    num_outputs=256,
                    scope="dense2")
                dense1 = tf.nn.dropout(dense2, 0.9)
                #dense1 = dense(fc1, 200, None, w_i, None, activation=None)
                if not reuse:
                    tf.contrib.layers.summarize_activation(conv1)
                    tf.contrib.layers.summarize_activation(conv2)
                    tf.contrib.layers.summarize_activation(dense1)
                with tf.variable_scope('mu'):
                    mu = dense(dense1,
                               self.action_dim,
                               None,
                               w_i,
                               None,
                               activation=tf.nn.relu)
                    variable_summaries_history(mu, self.is_local_net)
                with tf.variable_scope('sigma'):
                    sigma = dense(dense1,
                                  self.action_dim,
                                  None,
                                  w_i,
                                  None,
                                  activation=tf.nn.sigmoid)
                    variable_summaries_history(sigma, self.is_local_net)
            elif self.config.ACTOR_NETWORK_TYPE == 4:
                self.dense1 = dense(self.state_input,
                                    200,
                                    None,
                                    w_i,
                                    None,
                                    activation=None)
                variable_summaries_layer(self.dense1, self.is_local_net)
                action_dim_half = int(self.action_dim / 2)
                #TODO CHANGE REPLAY BUFFER TO OUR NEW IMPLEMENTATION WITH TERMINAL BASED RECALL
                if not reuse:
                    tf.contrib.layers.summarize_activation(self.dense1)
                with tf.variable_scope('muStance'):
                    self.muStance = dense(self.dense1,
                                          action_dim_half,
                                          None,
                                          w_i,
                                          None,
                                          activation=tf.nn.selu)
                with tf.variable_scope('sigmaStance'):
                    self.sigmaStance = dense(self.dense1,
                                             action_dim_half,
                                             None,
                                             w_i,
                                             None,
                                             activation=tf.nn.sigmoid)
                with tf.variable_scope('muSwing'):
                    self.muSwing = dense(self.dense1,
                                         action_dim_half,
                                         None,
                                         w_i,
                                         None,
                                         activation=tf.nn.selu)
                with tf.variable_scope('sigmaSwing'):
                    self.sigmaSwing = dense(self.dense1,
                                            action_dim_half,
                                            None,
                                            w_i,
                                            None,
                                            activation=tf.nn.sigmoid)
                with tf.variable_scope('mu'):
                    mu = tf.concat([self.muStance, self.muSwing], axis=1)
                    variable_summaries_history(mu, self.is_local_net)
                with tf.variable_scope('sigma'):
                    sigma = tf.concat([self.sigmaStance, self.sigmaSwing],
                                      axis=1)
                    variable_summaries_history(sigma, self.is_local_net)
            elif self.config.ACTOR_NETWORK_TYPE == 5:
                # This is the first network that will try to implement some notion of :
                # - symmetry. same network used for left and right leg. Only state input changes.
                # - phase based network. two different network groups used for swing and stance.
                # - general pd control.

                isExtensor = np.array(
                    #             HF GLU HAB HAD VAS HAM GAS SOL TA
                    [0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0])

                def concat_with_rule(arr1, arr2, rule):
                    i2 = 0
                    i1 = 0
                    arr = []
                    for r in rule:
                        if r:
                            arr.append(arr1[0, i1])
                            i1 += 1
                        else:
                            arr.append(arr2[0, i2])
                            i2 += 1
                    return tf.expand_dims(tf.stack(arr, axis=-1), 0)

                selector = 1
                multiplier = 1
                mu_ext_st, sigma_ext_st = self.general_actor(
                    "extensorSt",
                    self.state_dim,
                    np.sum(isExtensor == selector),
                    ext=multiplier,
                    reuse=True)
                multiplier = 0.3
                mu_ext_sw, sigma_ext_sw = self.general_actor(
                    "extensorSw",
                    self.state_dim,
                    np.sum(isExtensor == selector),
                    ext=multiplier,
                    reuse=True)
                selector = 0
                multiplier = -0.3
                mu_flex_st, sigma_flex_st = self.general_actor(
                    "flexorSt",
                    self.state_dim,
                    np.sum(isExtensor == selector),
                    ext=multiplier,
                    reuse=True)
                multiplier = -1
                mu_flex_sw, sigma_flex_sw = self.general_actor(
                    "flexorSw",
                    self.state_dim,
                    np.sum(isExtensor == selector),
                    ext=multiplier,
                    reuse=True)

                mu_st = concat_with_rule(mu_ext_st, mu_flex_st, isExtensor)
                sigma_st = concat_with_rule(sigma_ext_st, sigma_flex_st,
                                            isExtensor)

                mu_sw = concat_with_rule(mu_ext_sw, mu_flex_sw, isExtensor)
                sigma_sw = concat_with_rule(sigma_ext_sw, sigma_flex_sw,
                                            isExtensor)

                mu = tf.concat([mu_st, mu_sw], axis=1)
                sigma = tf.concat([sigma_st, sigma_sw], axis=1)

                return mu, sigma
            else:
                raise ValueError(
                    'Network type "{}" not implemented, should be integer'.
                    format(self.config.ACTOR_NETWORK_TYPE))

            # return mu * self.config.ACTION_BOUND[1], sigma + 1e-4
            return mu, sigma + 1e-4
예제 #8
0
    def action_get_current(self, reuse=False):
        # Graph shared with Value Net
        with tf.variable_scope('actor'):
            w_i = tf.initializers.glorot_normal()
            b_i = tf.zeros_initializer()

            if self.config.ACTOR_NETWORK_TYPE == 1:
                with tf.variable_scope('act_dense1'):
                    dense1 = dense(self.state_input,
                                   512, [512],
                                   w_i,
                                   activation=tf.nn.relu)
                    if not reuse and self.name == "W_0":
                        variable_summaries_layer(dense1, self.is_local_net)
                with tf.variable_scope('act_dense2'):
                    dense2 = dense(dense1,
                                   256, [256],
                                   w_i,
                                   activation=tf.nn.relu)
                    if not reuse and self.name == "W_0":
                        variable_summaries_layer(dense2, self.is_local_net)

                if not reuse and self.name == "W_0":
                    tf.contrib.layers.summarize_activation(dense1)
                    tf.contrib.layers.summarize_activation(dense2)
                with tf.variable_scope('mu'):
                    mu = dense(dense2,
                               self.action_dim,
                               None,
                               w_i,
                               activation=None)
                    if not reuse and self.name == "W_0":
                        variable_summaries_history(mu, self.is_local_net)

            elif self.config.ACTOR_NETWORK_TYPE == 11:
                with tf.variable_scope('act_dense1'):
                    dense1 = dense(self.state_input,
                                   512, [512],
                                   w_i,
                                   activation=tf.nn.relu)
                    if not reuse and self.name == "W_0":
                        variable_summaries_layer(dense1, self.is_local_net)
                with tf.variable_scope('act_dense2'):
                    dense2 = dense(dense1,
                                   256, [256],
                                   w_i,
                                   activation=tf.nn.relu)
                    if not reuse and self.name == "W_0":
                        variable_summaries_layer(dense2, self.is_local_net)
                with tf.variable_scope('act_dense3'):
                    dense3 = dense(dense2,
                                   256, [256],
                                   w_i,
                                   b_i,
                                   activation=tf.nn.relu)
                    if not reuse and self.name == "W_0":
                        variable_summaries_layer(dense3, self.is_local_net)

                if not reuse and self.name == "W_0":
                    tf.contrib.layers.summarize_activation(dense1)
                    tf.contrib.layers.summarize_activation(dense2)
                    tf.contrib.layers.summarize_activation(dense3)
                with tf.variable_scope('mu'):
                    mu = dense(dense3,
                               self.action_dim,
                               None,
                               w_i,
                               activation=None)
                    if not reuse and self.name == "W_0":
                        variable_summaries_history(mu, self.is_local_net)

            else:
                raise ValueError(
                    'Network type "{}" not implemented, should be integer'.
                    format(self.config.ACTOR_NETWORK_TYPE))
            return mu