Ejemplo n.º 1
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        def _positional_encoding(features):
            # Append two features of positional encoding to the given feature maps
            d = features.get_shape().as_list()[1]
            indices = tf.range(d)
            x = tf.tile(tf.reshape(indices, [d, 1]), [1, d])
            y = tf.tile(tf.reshape(indices, [1, d]), [d, 1])
            pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32)
            pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1])
            return tf.concat([features, pos], axis=3)

        def f_phi(g, scope='f_phi'):
            with tf.variable_scope(scope):
                fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1')
                fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
                logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3')
                return logits

        with tf.variable_scope(scope):
            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3')

            conv_pos = _positional_encoding(conv_3)

            ###################### MODIFY HERE ######################

            def g_theta(o_i, o_j, q, scope='g_theta', reuse=True):
                with tf.variable_scope(scope, reuse=reuse):
                    g_1 = fc(tf.concat([o_i, o_j, q], axis=1), 256, name='g_1')
                    g_2 = fc(g_1, 256, name='g_2')
                    return g_2

            d = conv_pos.get_shape().as_list()[1]

            all_g = []
            for i in range(d * d):
                o_i = conv_pos[:, int(i / d), int(i % d), :]
                for j in range(d * d):
                    o_j = conv_pos[:, int(j / d), int(j % d), :]
                    if i == 0 and j == 0:
                        g_i_j = g_theta(o_i, o_j, q, reuse=False)
                    else:
                        g_i_j = g_theta(o_i, o_j, q, reuse=True)
                    all_g.append(g_i_j)

            all_g = tf.stack(all_g, axis=0)
            all_g = tf.reduce_sum(all_g, axis=0)

            #########################################################

            logits = f_phi(all_g)

        return logits
Ejemplo n.º 2
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        def _positional_encoding(features):
            # Append two features of positional encoding to the given feature maps
            d = features.get_shape().as_list()[1]
            indices = tf.range(d)
            x = tf.tile(tf.reshape(indices, [d, 1]), [1, d])
            y = tf.tile(tf.reshape(indices, [1, d]), [d, 1])
            pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32)
            pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1])
            return tf.concat([features, pos], axis=3)

        def f_phi(g, scope='f_phi'):
            with tf.variable_scope(scope):
                fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1')
                fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
                logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3')
                return logits

        with tf.variable_scope(scope):
            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3')

            conv_pos = _positional_encoding(conv_3)

            ###################### MODIFY HERE ######################

            def g_theta(o_pair, q, d, scope='g_theta'):
                with tf.variable_scope(scope):
                    q = q[:, None, None, :]
                    q = tf.tile(q, [1, d * d, d * d, 1])
                    o = tf.concat([o_pair, q], axis=3)
                    g_1 = conv2d(o, 256, self.is_training,
                                 k_h=1, k_w=1, s_h=1, s_w=1,
                                 activation_fn=tf.nn.relu, name='g_1')
                    g_2 = conv2d(g_1, 256, self.is_training,
                                 k_h=1, k_w=1, s_h=1, s_w=1,
                                 activation_fn=tf.nn.relu, name='g_2')
                    return g_2

            d = conv_3.get_shape().as_list()[1]
            o_1 = tf.tile(tf.reshape(conv_pos, [-1, d*d, 1, 26]), [1, 1, d*d, 26])
            o_2 = tf.tile(tf.reshape(conv_pos, [-1, 1, d*d, 26]), [1, d*d, 1, 26])
            o_pair = tf.concat([o_1, o_2], axis=3)
            g = g_theta(o_pair, q, d)

            all_g = tf.reduce_sum(g, [1, 2])

            #########################################################

            logits = f_phi(all_g)

        return logits
Ejemplo n.º 3
0
 def g_theta(o_pair, q, d, scope='g_theta'):
     with tf.variable_scope(scope):
         q = q[:, None, None, :]
         q = tf.tile(q, [1, d * d, d * d, 1])
         o = tf.concat([o_pair, q], axis=3)
         g_1 = conv2d(o, 256, self.is_training,
                      k_h=1, k_w=1, s_h=1, s_w=1,
                      activation_fn=tf.nn.relu, name='g_1')
         g_2 = conv2d(g_1, 256, self.is_training,
                      k_h=1, k_w=1, s_h=1, s_w=1,
                      activation_fn=tf.nn.relu, name='g_2')
         return g_2
Ejemplo n.º 4
0
 def State_Encoder(s, batch_size, scope='State_Encoder', reuse=False):
     with tf.variable_scope(scope, reuse=reuse) as scope:
         if not reuse: log.warning(scope.name)
         _ = conv2d(s, 16, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv1')
         _ = conv2d(_, 32, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv2')
         _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv3')
         if self.dataset_type == 'vizdoom':
             _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                        info=not reuse, batch_norm=True, name='conv4')
             _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                        info=not reuse, batch_norm=True, name='conv5')
         state_feature = tf.reshape(_, [batch_size, -1])
         return state_feature
Ejemplo n.º 5
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        with tf.variable_scope(scope):
            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3')

            # Append the question into image features
            conv_q = tf.concat([tf.reshape(conv_3, [tf.shape(conv_3)[0], -1]), q], axis=1)
            
            fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1')
            fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2')
            fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
            logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3')

        return logits
Ejemplo n.º 6
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        def _positional_encoding(features):
            # Append two features of positional encoding to the given feature maps
            d = features.get_shape().as_list()[1]
            indices = tf.range(d)
            x = tf.tile(tf.reshape(indices, [d, 1]), [1, d])
            y = tf.tile(tf.reshape(indices, [1, d]), [d, 1])
            pos = tf.cast(tf.stack([x, y], axis=2)[None] / d, tf.float32)
            pos = tf.tile(pos, [tf.shape(img)[0], 1, 1, 1])
            return tf.concat([features, pos], axis=3)

        def f_phi(g, scope='f_phi'):
            with tf.variable_scope(scope):
                fc_1 = fc(g, 256, activation_fn=tf.nn.relu, name='fc_1')
                fc_1 = slim.dropout(fc_1, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
                logits = fc(fc_1, self.a_dim, activation_fn=None, name='fc_3')
                return logits

        with tf.variable_scope(scope):
            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3') # (b,d,d,c)

            # Adding positional information (x,y) into features: size (b,d,d,c+2)
            conv_pos = _positional_encoding(conv_3) 

            ###################### MODIFY HERE ###################### 
            
            conv_q = tf.concat([tf.reshape(conv_pos, [tf.shape(conv_pos)[0], -1]), q], axis=1) 
            
            fc_1 = fc(conv_q, 256, activation_fn=tf.nn.relu, name='fc_1') 
            all_g = fc_1

            #########################################################

            logits = f_phi(all_g)

        return logits
Ejemplo n.º 7
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        with tf.variable_scope(scope):

            ###################### MODIFY HERE ######################
            ## Compute film(q) for gamma, beta

            conv_1 = conv2d(img,
                            24,
                            self.is_training,
                            activation_fn=tf.nn.relu,
                            name='conv_1')
            ## Affine transform of conv_1
            conv_2 = conv2d(conv_1,
                            24,
                            self.is_training,
                            activation_fn=tf.nn.relu,
                            name='conv_2')
            ## Affine transform of conv_2
            conv_3 = conv2d(conv_2,
                            24,
                            self.is_training,
                            activation_fn=tf.nn.relu,
                            name='conv_3')
            ## Affine transform of conv_3

            #########################################################

            features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1])
            fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1')
            fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2')
            fc_2 = slim.dropout(fc_2,
                                keep_prob=0.5,
                                is_training=self.is_training,
                                scope='fc_3/')
            logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3')

        return logits
Ejemplo n.º 8
0
    def build(self, img, q, scope='Classifier'):
        # Normalize input images into 0 ~ 1
        img = img / 255.

        with tf.variable_scope(scope):

            ###################### MODIFY HERE ######################

            def film(q, scope='film'):
                with tf.variable_scope(scope):
                    cond = fc(q, 3 * 2 * 24, activation_fn=None, name='cond')
                    cond = tf.reshape(cond, [-1, 3, 2, 24])
                    return cond

            def modulate(conv, gamma, beta):
                gamma = tf.reshape(gamma, [-1, 1, 1, 24])
                beta = tf.reshape(beta, [-1, 1, 1, 24])
                return (1 + gamma) * conv + beta

            q_embed = fc(q, 256, name='fc_q')
            cond = film(q_embed)

            conv_1 = conv2d(img, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_1')
            conv_1 = modulate(conv_1, cond[:, 0, 0, :], cond[:, 0, 1, :])
            conv_2 = conv2d(conv_1, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_2')
            conv_2 = modulate(conv_2, cond[:, 1, 0, :], cond[:, 1, 1, :])
            conv_3 = conv2d(conv_2, 24, self.is_training, activation_fn=tf.nn.relu, name='conv_3')
            conv_3 = modulate(conv_3, cond[:, 2, 0, :], cond[:, 2, 1, :])

            #########################################################

            features = tf.reshape(conv_3, [tf.shape(conv_3)[0], -1])
            fc_1 = fc(features, 256, activation_fn=tf.nn.relu, name='fc_1')
            fc_2 = fc(fc_1, 256, activation_fn=tf.nn.relu, name='fc_2')
            fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=self.is_training, scope='fc_3/')
            logits = fc(fc_2, self.a_dim, activation_fn=None, name='fc_3')

        return logits
Ejemplo n.º 9
0
 def State_Encoder(s, per, batch_size, scope='State_Encoder', reuse=False):
     with tf.variable_scope(scope, reuse=reuse) as scope:
         if not reuse: log.warning(scope.name)
         _ = conv2d(s, 16, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv1')
         _ = conv2d(_, 32, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv2')
         _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                    info=not reuse, batch_norm=True, name='conv3')
         if self.pixel_input:
             _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                        info=not reuse, batch_norm=True, name='conv4')
             _ = conv2d(_, 48, is_train, k_h=3, k_w=3,
                        info=not reuse, batch_norm=True, name='conv5')
         state_feature = tf.reshape(_, [batch_size, -1])
         if self.state_encoder_fc:
             state_feature = fc(state_feature, 512, is_train,
                                info=not reuse, name='fc1')
             state_feature = fc(state_feature, 512, is_train,
                                info=not reuse, name='fc2')
         state_feature = tf.concat([state_feature, per], axis=-1)
         if not reuse: log.info(
             'concat feature {}'.format(state_feature))
         return state_feature
Ejemplo n.º 10
0
    def construct(self, config):
        self.w = {}
        self.t_w = {}
        activation_fn = tf.nn.relu
        initializer = tf.truncated_normal_initializer(0, 0.02)
        #all use the same state representation.

        with tf.variable_scope("target_ori_q"):
            if self.cnn_format == 'NHWC':
                self.residual_state_input_n = tf.placeholder(
                    "float32", [
                        None, self.screen_height, self.screen_width,
                        self.history_length
                    ],
                    name="residual_state_input")
            else:
                self.residual_state_input_n = tf.placeholder(
                    "float32", [
                        None, self.history_length, self.screen_height,
                        self.screen_width
                    ],
                    name="residual_state_input")
            if self.cnn_format == 'NHWC':
                self.state_input_n = tf.placeholder("float32", [
                    None, self.screen_height, self.screen_width,
                    self.history_length
                ],
                                                    name="state_input")
            else:
                self.state_input_n = tf.placeholder("float32", [
                    None, self.history_length, self.screen_height,
                    self.screen_width
                ],
                                                    name="state_input")
            self.l1_n, self.t_w['l1_s_w'], self.t_w['l1_s_b'] = conv2d(
                tf.concat(3,
                          [self.state_input_n, self.residual_state_input_n]),
                32, [8, 8], [4, 4],
                initializer,
                activation_fn,
                self.cnn_format,
                name='l1_s')
            self.l2_n, self.t_w['l2_s_w'], self.t_w['l2_s_b'] = conv2d(
                self.l1_n,
                64, [4, 4], [2, 2],
                initializer,
                activation_fn,
                self.cnn_format,
                name="l2_s")
            self.l3_n, self.t_w['l3_s_w'], self.t_w['l3_s_b'] = conv2d(
                self.l2_n,
                64, [3, 3], [1, 1],
                initializer,
                activation_fn,
                self.cnn_format,
                name="l3_s")
            shape = self.l3_n.get_shape().as_list()
            self.l3_n_flat = tf.reshape(
                self.l3_n, [-1, reduce(lambda x, y: x * y, shape[1:])])
            self.l1_n_q, self.t_w['l1_q_w'], self.t_w['l1_q_b'] = linear(
                self.l3_n_flat, 512, activation_fn=activation_fn, name="l1_q")
            self.ori_q_n, self.t_w['l2_q_w'], self.t_w['l2_q_b'] = linear(
                self.l1_n_q,
                self.config.option_num + self.config.action_num,
                name='ori_q')

        with tf.variable_scope("ori_q"):
            if self.cnn_format == 'NHWC':
                self.residual_state_input_s = tf.placeholder(
                    "float32", [
                        None, self.screen_height, self.screen_width,
                        self.history_length
                    ],
                    name="residual_state_input")
            else:
                self.residual_state_input_s = tf.placeholder(
                    "float32", [
                        None, self.history_length, self.screen_height,
                        self.screen_width
                    ],
                    name="residual_state_input")
            if self.cnn_format == 'NHWC':
                self.state_input = tf.placeholder("float32", [
                    None, self.screen_height, self.screen_width,
                    self.history_length
                ],
                                                  name="state_input")
            else:
                self.state_input = tf.placeholder("float32", [
                    None, self.history_length, self.screen_height,
                    self.screen_width
                ],
                                                  name="state_input")
            self.l1_s, self.w['l1_s_w'], self.w['l1_s_b'] = conv2d(
                tf.concat(3, [self.state_input, self.residual_state_input_s]),
                32, [8, 8], [4, 4],
                initializer,
                activation_fn,
                self.cnn_format,
                name='l1_s')
            self.l2_s, self.w['l2_s_w'], self.w['l2_s_b'] = conv2d(
                self.l1_s,
                64, [4, 4], [2, 2],
                initializer,
                activation_fn,
                self.cnn_format,
                name="l2_s")
            self.l3_s, self.w['l3_s_w'], self.w['l3_s_b'] = conv2d(
                self.l2_s,
                64, [3, 3], [1, 1],
                initializer,
                activation_fn,
                self.cnn_format,
                name="l3_s")
            shape = self.l3_s.get_shape().as_list()
            self.l3_s_flat = tf.reshape(
                self.l3_s, [-1, reduce(lambda x, y: x * y, shape[1:])])
            self.l1_q, self.w['l1_q_w'], self.w['l1_q_b'] = linear(
                self.l3_s_flat, 512, activation_fn=activation_fn, name="l1_q")
            self.ori_q, self.w['l2_q_w'], self.w['l2_q_b'] = linear(
                self.l1_q,
                self.config.option_num + self.config.action_num,
                name='ori_q')

        with tf.variable_scope("qq"):
            self.l1_qq, self.w['l1_qq_w'], self.w['l1_qq_b'] = linear(
                self.l3_s_flat, 512, activation_fn=activation_fn, name="l1_qq")
            self.q, self.w['l2_qq_w'], self.w['l2_qq_b'] = linear(
                self.l1_qq, (self.config.action_num + self.config.option_num) *
                self.config.option_num,
                name='q')

        with tf.variable_scope("target_qq"):
            self.l1_qq_n, self.t_w['l1_qq_w'], self.t_w['l1_qq_b'] = linear(
                self.l3_n_flat, 512, activation_fn=activation_fn, name="l1_qq")
            self.q_n, self.t_w['l2_qq_w'], self.t_w['l2_qq_b'] = linear(
                self.l1_qq_n,
                (self.config.action_num + self.config.option_num) *
                self.config.option_num,
                name='q')

        with tf.variable_scope("parameter"):
            self.k = tf.placeholder("float32", [None], name="k")
            self.terminals = tf.placeholder('float32', [None],
                                            name="terminals")
            self.ep = tf.placeholder("float32", None, name="ep")

        with tf.variable_scope("input"):
            self.o = tf.placeholder("int64", [None], name="o")
            self.b = tf.placeholder("int64", [None], name="b")
            self.g = tf.placeholder('int64', [None], name="g")
        with tf.variable_scope("reward"):
            self.reward_st = tf.placeholder("float32", [None],
                                            name="reward_st")

        with tf.variable_scope("beta"):
            shape = self.residual_state_input_n.get_shape().as_list()
            self.residual_input_n_flat = tf.reshape(
                self.residual_state_input_n,
                [-1, reduce(lambda x, y: x * y, shape[1:])])
            shape = self.state_input_n.get_shape().as_list()
            self.state_input_n_flat = tf.reshape(
                self.state_input_n,
                [-1, reduce(lambda x, y: x * y, shape[1:])])
            self.state_input_n_flat_ = tf.concat(
                1, [self.residual_input_n_flat, 0.1 * self.state_input_n_flat])
            self.beta_na_, self.l1_b_w, self.l1_b_b = linear(
                self.state_input_n_flat_,
                self.config.option_num,
                stddev=0.1,
                activation_fn=tf.nn.sigmoid,
                name="beta")
            #self.beta_na = self.beta_na_
            self.beta_na = tf.select(
                tf.greater(self.beta_na_, self.config.clip_prob),
                tf.ones_like(self.beta_na_, tf.float32),
                tf.zeros_like(self.beta_na_, tf.float32))
            self.beta_ng = tf.reduce_sum(
                tf.mul(self.beta_na,
                       tf.one_hot(self.g, self.config.option_num, 1., 0., -1)),
                1)
        with tf.variable_scope('pred_to_target'):
            self.t_w_input = {}
            self.t_w_assign_op = {}

            for name in self.w.keys():
                self.t_w_input[name] = tf.placeholder(
                    'float32', self.t_w[name].get_shape().as_list(), name=name)
                self.t_w_assign_op[name] = self.t_w[name].assign(
                    self.t_w_input[name])

        with tf.variable_scope("q"):
            #ori - q
            self.q_na = self.ori_q_n
            self.q_sa = self.ori_q
            self.max_q_n = tf.reduce_max(self.ori_q_n, 1)
            self.q_so = tf.reduce_sum(
                tf.mul(
                    self.q_sa,
                    tf.one_hot(self.o,
                               self.config.option_num + self.config.action_num,
                               1., 0., -1)), 1)
            self.target_q_so = tf.stop_gradient(self.reward_st + (1 - self.terminals) * self.config.discount**self.k * \
                                                       self.max_q_n)
            #g - q
            action_num = self.config.action_num + self.config.option_num
            fn = (self.config.option_num) * (self.config.action_num +
                                             self.config.option_num)

            self.q_naa = tf.reshape(self.q_n, [
                -1, (self.config.action_num + self.config.option_num),
                self.config.option_num
            ])
            self.q_nga = tf.reduce_sum(
                tf.mul(
                    self.q_naa,
                    tf.expand_dims(
                        tf.one_hot(self.g, self.config.option_num, 1., 0., -1),
                        1)), 2)

            self.max_q_ng = tf.reduce_max(self.q_nga, 1)
            self.q_saa = tf.reshape(self.q, [
                -1, (self.config.action_num + self.config.option_num),
                self.config.option_num
            ])
            self.q_sga = tf.reduce_sum(
                tf.mul(
                    self.q_saa,
                    tf.expand_dims(
                        tf.one_hot(self.g, self.config.option_num, 1., 0., -1),
                        1)), 2)
            self.q_sgo = tf.reduce_sum(
                tf.mul(
                    self.q,
                    tf.one_hot(self.o * self.config.option_num + self.g, fn,
                               1., 0., -1)), 1)
            self.target_q_sgo = tf.stop_gradient(
                self.reward_st +
                (1 - self.terminals) * self.config.discount**self.k *
                (self.beta_ng * (self.config.goal_pho + self.max_q_n) +
                 (1 - self.beta_ng) * self.max_q_ng))

        with tf.variable_scope("optimizer"):
            self.q_delta = self.target_q_so - self.q_so
            self.qq_delta = self.target_q_sgo - self.q_sgo

            self.q_loss = tf.reduce_mean(tf.square(self.q_delta),
                                         name="q_loss")
            self.qq_loss = tf.reduce_mean(tf.square(self.qq_delta),
                                          name="qq_loss")
            self.learning_rate_op = tf.maximum(
                self.learning_rate_minimum,
                tf.train.exponential_decay(self.learning_rate,
                                           self.learn_count,
                                           self.learning_rate_decay_step,
                                           self.learning_rate_decay,
                                           staircase=True))
            q_optim = tf.train.GradientDescentOptimizer(self.learning_rate_op)
            qq_optim = tf.train.GradientDescentOptimizer(
                self.learning_rate_op * 2)
            self.gvs, self.gvs2 = q_optim.compute_gradients(
                self.q_loss), qq_optim.compute_gradients(self.qq_loss)
            capped_gvs, capped_gvs2 = [
                (tf.clip_by_value(grad, self.min_delta, self.max_delta), var)
                for grad, var in self.gvs if grad is not None
            ], [(tf.clip_by_value(grad, self.min_delta, self.max_delta), var)
                for grad, var in self.gvs2 if grad is not None]
            self.q_optim = q_optim.apply_gradients(capped_gvs)
            self.qq_optim = qq_optim.apply_gradients(capped_gvs2)

            #self.beta_optim = tf.train.GradientDescentOptimizer(self.learning_rate_op).minimize(self.beta_loss)
        '''
        with tf.variable_scope("summary"):
            tags = np.empty((self.config.option_num,
                                             (self.config.action_num+self.config.option_num),
                                             self.config.state_num),dtype="<40U")
            for i in range(tags.shape[0]):
                for j in range(tags.shape[1]):
                    for z in range(tags.shape[2]):
                        tags[i,j,z] = "%s=%s/%s:g:%d-o:%d-s:%d"%(self.env_name, self.env_type, "q", i,j,z)
            self.w_summary = tf.scalar_summary(tags, self.qs)
            self.all_summary = tf.merge_summary([self.learn_count_summary, self.w_summary])
        '''
        with tf.variable_scope("summary"):
            self.all_summary = tf.merge_summary([self.learn_count_summary])