def residual_block3(l, base_dim, increase_dim=False, projection=False):
        if increase_dim:
            layer_1 = batch_norm(ConvLayer(l,
                                           num_filters=base_dim,
                                           filter_size=(1, 1),
                                           stride=(2, 2),
                                           nonlinearity=None,
                                           pad='same',
                                           W=nn.init.HeNormal(gain='relu')))
        else:
            layer_1 = batch_norm(ConvLayer(l,
                                           num_filters=base_dim,
                                           filter_size=(1, 1),
                                           stride=(1, 1),
                                           nonlinearity=rectify,
                                           pad='same',
                                           W=nn.init.HeNormal(gain='relu')))
        layer_2 = batch_norm(ConvLayer(layer_1,
                                       num_filters=base_dim,
                                       filter_size=(3, 3),
                                       stride=(1, 1),
                                       nonlinearity=rectify,
                                       pad='same',
                                       W=nn.init.HeNormal(gain='relu')))
        layer_3 = batch_norm(ConvLayer(layer_2,
                                       num_filters=4*base_dim,
                                       filter_size=(1, 1),
                                       stride=(1, 1),
                                       nonlinearity=rectify,
                                       pad='same',
                                       W=nn.init.HeNormal(gain='relu')))

        # add shortcut connection
        if increase_dim:
            if projection:
                # projection shortcut (option B in paper)
                projection = batch_norm(ConvLayer(l,
                                                  num_filters=4*base_dim,
                                                  filter_size=(1, 1),
                                                  stride=(2, 2),
                                                  nonlinearity=None,
                                                  pad='same',
                                                  b=None))
                block = NonlinearityLayer(ElemwiseSumLayer([layer_3, projection]),
                                          nonlinearity=rectify)
            else:
                # identity shortcut (option A in paper)
                # we use a pooling layer to get identity with strides,
                # since identity layers with stride don't exist in Lasagne
                identity = PoolLayer(l, pool_size=1, stride=(2,2),
                                     mode='average_exc_pad')
                padding = PadLayer(identity, [4*base_dim,0,0], batch_ndim=1)
                block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]),
                                          nonlinearity=rectify)

        else:
            block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]),
                                      nonlinearity=rectify)

        return block
Example #2
0
def conv_relu(X,
              kernel_shape,
              conv_std,
              bias_val,
              do_batch_norm,
              train_phase,
              add_summary=True):
    weights = tf.get_variable(
        "weights",
        kernel_shape,
        initializer=tf.contrib.layers.xavier_initializer())
    # tf.random_normal_initializer(stddev=conv_std))
    biases = tf.get_variable("biases",
                             kernel_shape[-1],
                             initializer=tf.constant_initializer(bias_val))
    if add_summary:
        with tf.variable_scope('weights'):
            PoseTools.variable_summaries(weights)
            #     PoseTools.variable_summaries(biases)
    conv = tf.nn.conv2d(X, weights, strides=[1, 1, 1, 1], padding='SAME')
    if do_batch_norm:
        conv = batch_norm(conv, train_phase)
    with tf.variable_scope('conv'):
        PoseTools.variable_summaries(conv)
    return tf.nn.relu(conv - biases)
Example #3
0
def nn_bn_layer(input_tensor,
                input_dim,
                output_dim,
                layer_name,
                act,
                is_training,
                sess,
                parForTarget=None):
    """Reusable code for making a simple neural net layer.
    It does a matrix multiply, bias add, and then uses ReLU to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.
    """
    # Adding a name scope ensures logical grouping of the layers in the graph.
    #input_dim = input_tensor.shape[0]
    with tf.name_scope(layer_name):
        # This Variable will hold the state of the weights for the layer
        with tf.name_scope('weights'):
            weights = weight_variable([input_dim, output_dim])
            variable_summaries(weights)
        with tf.name_scope('pre_bn'):
            PBN = tf.matmul(input_tensor, weights)
            variable_summaries(PBN)
        with tf.name_scope('pre_activation'):
            BN = batch_norm(PBN,
                            output_dim,
                            is_training,
                            sess,
                            parForTarget=parForTarget)
        with tf.name_scope('activation'):
            activations = act(BN.bnorm)
            variable_summaries(activations)

        layer_paras = [weights]
        return activations, layer_paras, [BN]
Example #4
0
def conv_relu_norm_init(X, kernel_shape, conv_std, bias_val, do_batch_norm, train_phase, add_summary=True):
    weights = tf.get_variable("weights", kernel_shape, initializer=tf.random_normal_initializer(stddev=conv_std))
    biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(bias_val))
    if add_summary:
        with tf.variable_scope('weights'):
            PoseTools.variable_summaries(weights)
            #     PoseTools.variable_summaries(biases)
    conv = tf.nn.conv2d(X, weights, strides=[1, 1, 1, 1], padding='SAME')
    if do_batch_norm:
        conv = batch_norm(conv, train_phase)
    with tf.variable_scope('conv'):
        PoseTools.variable_summaries(conv)
    return tf.nn.relu(conv - biases)
Example #5
0
is_training = tf.placeholder(tf.bool, name='is_training')

LUSV=tf.placeholder(tf.float32)
lr=tf.placeholder(tf.float32)

kernel = _variable_with_weight_decay('conv0',
                                 shape=[3, 3, 3, 16],
                                 stddev=np.sqrt(2.0/3/9)
                                 , wd=weight_d)
trainWs.append(kernel)
orthoInit0=kernel.assign(svd_orthonormal(kernel.get_shape().as_list()))
upd0=kernel.assign(kernel/LUSV)
ConvLayer0 = tf.nn.conv2d(x, kernel, [1, 1, 1, 1], padding='SAME')
print(ConvLayer0.get_shape().as_list())
if ifbatchnorm:
    net = batch_norm(ConvLayer0,is_training,scope='conv0')
net = tf.nn.relu(net)
if visual:
    _activation_summary(net)
    
resLayer=[]
orthoInit=[]
kernel_upd=[]
for block in range(3):
    blocks=[]
    nfilters=16<<block
    for layer in range(repeat_layer):
        layers=[]
        net_copy=net
        for i in range(2):
            name = 'block_%d/layer_%d/conv%d' % (block, layer,i)
Example #6
0
    def __init__(self, num_states, num_actions):

        tf.reset_default_graph()
        self.g = tf.Graph()
        with self.g.as_default():
            self.sess = tf.InteractiveSession()

            # Critic Q Network:
            self.critic_state_in = tf.placeholder("float", [None, num_states])
            self.critic_action_in = tf.placeholder("float", [None, num_actions])
            self.W1_c = tf.Variable(tf.random_uniform(
                [num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)))
            self.B1_c = tf.Variable(tf.random_uniform(
                [N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)))
            self.W2_c = tf.Variable(tf.random_uniform(
                [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)))
            self.B2_c = tf.Variable(tf.random_uniform(
                [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)))
            self.W2_action_c = tf.Variable(tf.random_uniform(
                [num_actions, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)))
            self.W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003, 0.003))
            self.B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003))

            self.is_training = tf.placeholder(tf.bool, [])
            self.H1_t = tf.matmul(self.critic_state_in, self.W1_c)
            self.H1_c_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training, self.sess)

            self.H1_c = tf.nn.softplus(self.H1_c_bn.bnorm) + self.B1_c

            self.H2_t = tf.matmul(self.H1_c, self.W2_c) + \
                tf.matmul(self.critic_action_in, self.W2_action_c)
            self.H2_c_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training, self.sess)
            self.H2_c = tf.nn.tanh(self.H2_c_bn.bnorm) + self.B2_c

            self.critic_q_model = tf.matmul(self.H2_c, self.W3_c) + self.B3_c

           # Target Critic Q Network:
            self.t_critic_state_in = tf.placeholder("float", [None, num_states])
            self.t_critic_action_in = tf.placeholder("float", [None, num_actions])
            self.t_W1_c = tf.Variable(tf.random_uniform(
                [num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)))
            self.t_B1_c = tf.Variable(tf.random_uniform(
                [N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states)))
            self.t_W2_c = tf.Variable(tf.random_uniform(
                [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)))
            self.t_W2_action_c = tf.Variable(tf.random_uniform(
                [num_actions, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)))
            self.t_B2_c = tf.Variable(tf.random_uniform(
                [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions)))
            self.t_W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003, 0.003))
            self.t_B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003))

            self.t_H1_t = tf.matmul(self.t_critic_state_in, self.t_W1_c)
            self.t_H1_c_bn = batch_norm(self.t_H1_t, N_HIDDEN_1,
                                        self.is_training, self.sess, self.H1_c_bn)
            self.t_H1_c = tf.nn.softplus(self.t_H1_c_bn.bnorm) + self.t_B1_c

            self.t_H2_t = tf.matmul(self.t_H1_c, self.t_W2_c) + \
                tf.matmul(self.t_critic_action_in, self.t_W2_action_c)
            self.t_H2_c_bn = batch_norm(self.t_H2_t, N_HIDDEN_2,
                                        self.is_training, self.sess, self.H2_c_bn)
            self.t_H2_c = tf.nn.tanh(self.t_H2_c_bn.bnorm) + self.t_B2_c

            self.t_critic_q_model = tf.matmul(self.t_H2_c, self.t_W3_c) + self.t_B3_c

            self.q_value_in = tf.placeholder("float", [None, 1])  # supervisor
            #self.l2_regularizer_loss = tf.nn.l2_loss(self.W1_c)+tf.nn.l2_loss(self.W2_c)+ tf.nn.l2_loss(self.W2_action_c) + tf.nn.l2_loss(self.W3_c)+tf.nn.l2_loss(self.B1_c)+tf.nn.l2_loss(self.B2_c)+tf.nn.l2_loss(self.B3_c)
            self.l2_regularizer_loss = 0.0001 * tf.reduce_sum(tf.pow(self.W2_c, 2))
            self.cost = tf.pow(self.critic_q_model - self.q_value_in, 2) / BATCH_SIZE + \
                self.l2_regularizer_loss  # /tf.to_float(tf.shape(self.q_value_in)[0])
            self.optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(self.cost)
            self.act_grad_v = tf.gradients(self.critic_q_model, self.critic_action_in)
            # this is just divided by batch size
            self.action_gradients = [self.act_grad_v[0] /
                                     tf.to_float(tf.shape(self.act_grad_v[0])[0])]
            # from simple actor net:
            self.check_fl = self.action_gradients

            # initialize all tensor variable parameters:
            self.sess.run(tf.initialize_all_variables())

            # To initialize critic and target with the same values:
            # copy target parameters
            self.sess.run([
                self.t_W1_c.assign(self.W1_c),
                self.t_B1_c.assign(self.B1_c),
                self.t_W2_c.assign(self.W2_c),
                self.t_W2_action_c.assign(self.W2_action_c),
                self.t_B2_c.assign(self.B2_c),
                self.t_W3_c.assign(self.W3_c),
                self.t_B3_c.assign(self.B3_c)
            ])
def build_model(input_var):

    layers = []

    input_layer = nn.layers.InputLayer(
            shape=(batch_size, num_channels, input_width, input_height),
            input_var=input_var,
            name='inputs'
    )
    layers.append(input_layer)

    conv_1 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(7, 7), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_1)

    pool_1 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
                         #MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    layers.append(pool_1)

    conv_2 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_2)

    conv_3 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_3)

    #pool_2 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_2 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_2)

    conv_4 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=64, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_4)

    conv_5 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=64, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_5)

    #pool_3 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_3 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=64, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_3)

    conv_6 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_6)

    conv_7 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_7)

    conv_8 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_8)

    conv_9 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_9)

    #pool_4 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_4 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_4)

    conv_10 = batch_norm(Conv2DLayer(layers[-1],
                          num_filters=256, filter_size=(3, 3), stride=(1, 1),
                          pad='same',
                          nonlinearity=LeakyRectify(leakiness),
                          W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                          untie_biases=True))
    layers.append(conv_10)

    conv_11 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_11)

    conv_12 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_12)

    conv_13 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_13)

    #pool_5 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_5 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_5)

    drop_1 = nn.layers.DropoutLayer(layers[-1], p=0.5)
    layers.append(drop_1)

    fc_1 = DenseLayer(layers[-1],
                      num_units=1024,
                      nonlinearity=None,
                      W=nn.init.Orthogonal(1.0),
                      b=nn.init.Constant(0.1))
    layers.append(fc_1)

    pool_6 = nn.layers.FeaturePoolLayer(layers[-1],
                                        pool_size=2,
                                        pool_function=T.max)
    layers.append(pool_6)

    merge_eyes = nn.layers.ReshapeLayer(layers[-1], shape=(batch_size // 2, -1))
    layers.append(merge_eyes)

    drop_2 = nn.layers.DropoutLayer(layers[-1], p=0.5)
    layers.append(drop_2)

    fc_2 = DenseLayer(layers[-1],
                      num_units=1024,
                      nonlinearity=None,
                      W=nn.init.Orthogonal(1.0),
                      b=nn.init.Constant(0.1))
    layers.append(fc_2)

    pool_7 = nn.layers.FeaturePoolLayer(layers[-1],
                                        pool_size=2,
                                        pool_function=T.max)
    layers.append(pool_7)

    drop_3 = nn.layers.DropoutLayer(layers[-1], p=0.5)
    layers.append(drop_3)

    fc_3 = DenseLayer(layers[-1],
                     num_units=output_dim * 2,
                     nonlinearity=None,
                     W=nn.init.Orthogonal(1.0),
                     b=nn.init.Constant(0.1))
    layers.append(fc_3)

    split_eyes = nn.layers.ReshapeLayer(layers[-1],
                                        shape=(batch_size, 5))
    layers.append(split_eyes)

    softmax_layer = nn.layers.NonlinearityLayer(layers[-1],
                                         nonlinearity=nn.nonlinearities.softmax)

    layers.append(softmax_layer)

    return input_layer, softmax_layer
Example #8
0
    def __init__(self, state_size, action_size, TAU=0.001, write_sum=0):
        tf.reset_default_graph()
        self.counter = 0
        self.write_sum = write_sum  # if to write the summary file
        ntanh = lambda x, name=[]: (tf.nn.tanh(x) + 1) / 2
        self.activations = [tf.nn.softplus, tf.nn.relu, ntanh]
        self.g = tf.Graph()
        with self.g.as_default():
            self.sess = tf.InteractiveSession()

            #actor network model parameters:
            self.state = tf.placeholder("float32", [None, state_size],
                                        name="state")
            self.is_training = tf.placeholder(tf.bool, [], name="is_training")

            with tf.name_scope("Layer_1"):
                with tf.name_scope('weights'):
                    self.W1 = tf.Variable(
                        tf.random_uniform([state_size, N_HIDDEN_1],
                                          -1 / math.sqrt(state_size),
                                          1 / math.sqrt(state_size)))
                    DNN.variable_summaries(self.W1)
                with tf.name_scope('biases'):
                    self.B1 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_1],
                                          -1 / math.sqrt(state_size),
                                          1 / math.sqrt(state_size)))
                    DNN.variable_summaries(self.B1)
                with tf.name_scope('pre_bn'):
                    self.PBN1 = tf.matmul(self.state, self.W1)
                    DNN.variable_summaries(self.PBN1)
                with tf.name_scope('pre_activation'):
                    self.BN1 = batch_norm(self.PBN1, N_HIDDEN_1,
                                          self.is_training, self.sess)
                    DNN.variable_summaries(self.BN1.bnorm)
                with tf.name_scope('activation'):
                    self.A1 = self.activations[0](self.BN1.bnorm)  # + self.B1
                    DNN.variable_summaries(self.A1)

            with tf.name_scope("Layer_2"):
                with tf.name_scope('weights'):
                    self.W2 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2],
                                          -1 / math.sqrt(N_HIDDEN_1),
                                          1 / math.sqrt(N_HIDDEN_1)))
                    DNN.variable_summaries(self.W2)
                with tf.name_scope('biases'):
                    self.B2 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_2],
                                          -1 / math.sqrt(N_HIDDEN_1),
                                          1 / math.sqrt(N_HIDDEN_1)))
                    DNN.variable_summaries(self.B2)
                with tf.name_scope('pre_bn'):
                    self.PBN2 = tf.matmul(self.A1, self.W2)
                    DNN.variable_summaries(self.PBN2)
                with tf.name_scope('pre_activation'):
                    self.BN2 = batch_norm(self.PBN2, N_HIDDEN_2,
                                          self.is_training, self.sess)
                    DNN.variable_summaries(self.BN2.bnorm)
                with tf.name_scope('activation'):
                    self.A2 = self.activations[1](self.BN2.bnorm)  # + self.B2
                    DNN.variable_summaries(self.A2)

            with tf.name_scope("Output_layer"):
                with tf.name_scope('weights'):
                    self.W3 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_2, action_size], -0.003,
                                          0.003))
                    DNN.variable_summaries(self.W3)
                with tf.name_scope('biases'):
                    self.B3 = tf.Variable(
                        tf.random_uniform([action_size], -0.003, 0.003))
                    DNN.variable_summaries(self.B3)
                with tf.name_scope('activation'):
                    self.action = self.activations[2](
                        tf.matmul(self.A2, self.W3) + self.B3)
                    DNN.variable_summaries(self.action)

            #target actor network model parameters:
            self.t_state = tf.placeholder("float32", [None, state_size],
                                          name="t_state")

            with tf.name_scope("T_Layer_1"):
                with tf.name_scope('weights'):
                    self.t_W1 = tf.Variable(
                        tf.random_uniform([state_size, N_HIDDEN_1],
                                          -1 / math.sqrt(state_size),
                                          1 / math.sqrt(state_size)))
                with tf.name_scope('biases'):
                    self.t_B1 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_1],
                                          -1 / math.sqrt(state_size),
                                          1 / math.sqrt(state_size)))
                with tf.name_scope('pre_bn'):
                    self.t_PBN1 = tf.matmul(self.t_state, self.t_W1)
                with tf.name_scope('pre_activation'):
                    self.t_BN1 = batch_norm(self.t_PBN1, N_HIDDEN_1,
                                            self.is_training, self.sess,
                                            self.BN1)
                with tf.name_scope('activation'):
                    self.t_A1 = self.activations[0](
                        self.t_BN1.bnorm)  # + self.t_B1

            with tf.name_scope("T_Layer_2"):
                with tf.name_scope('weights'):
                    self.t_W2 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2],
                                          -1 / math.sqrt(N_HIDDEN_1),
                                          1 / math.sqrt(N_HIDDEN_1)))
                with tf.name_scope('biases'):
                    self.t_B2 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_2],
                                          -1 / math.sqrt(N_HIDDEN_1),
                                          1 / math.sqrt(N_HIDDEN_1)))
                with tf.name_scope('pre_bn'):
                    self.t_PBN2 = tf.matmul(self.t_A1, self.t_W2)
                with tf.name_scope('pre_activation'):
                    self.t_BN2 = batch_norm(self.t_PBN2, N_HIDDEN_2,
                                            self.is_training, self.sess,
                                            self.BN2)
                with tf.name_scope('activation'):
                    self.t_A2 = self.activations[1](
                        self.t_BN2.bnorm)  # + self.t_B2

            with tf.name_scope("T_Output_layer"):
                with tf.name_scope('weights'):
                    self.t_W3 = tf.Variable(
                        tf.random_uniform([N_HIDDEN_2, action_size], -0.003,
                                          0.003))
                with tf.name_scope('biases'):
                    self.t_B3 = tf.Variable(
                        tf.random_uniform([action_size], -0.003, 0.003))
                with tf.name_scope('activation'):
                    self.t_action = self.activations[2](
                        tf.matmul(self.t_A2, self.t_W3) + self.t_B3)

            self.learning_rate = tf.placeholder("float32",
                                                shape=[],
                                                name="learning_rate")
            self.obj_action = tf.placeholder("float32", [None, action_size],
                                             name="obj_action")
            self.q_gradient_input = tf.placeholder(
                "float32", [None, action_size], name="q_gradient_input"
            )  #gets input from action_gradient computed in critic network file

            #cost of actor network:
            with tf.name_scope('cost'):
                self.cost = tf.reduce_mean(
                    tf.square(tf.round(self.action) - self.obj_action))
            self.actor_parameters = [
                self.W1, self.B1, self.W2, self.B2, self.W3, self.B3,
                self.BN1.scale, self.BN1.beta, self.BN2.scale, self.BN2.beta
            ]
            self.parameters_gradients = tf.gradients(
                self.action, self.actor_parameters, -self.q_gradient_input
            )  #/BATCH_SIZE) changed -self.q_gradient to -
            self.optimizer = tf.train.AdamOptimizer(
                self.learning_rate).apply_gradients(
                    zip(self.parameters_gradients, self.actor_parameters))

            #initialize all tensor variable parameters:
            self.sess.run(tf.global_variables_initializer())

            #To make sure actor and target have same intial parmameters copy the parameters:
            # copy target parameters
            self.sess.run([
                self.t_W1.assign(self.W1),
                self.t_B1.assign(self.B1),
                self.t_W2.assign(self.W2),
                self.t_B2.assign(self.B2),
                self.t_W3.assign(self.W3),
                self.t_B3.assign(self.B3)
            ])

            self.update_target_actor_op = [
                self.t_W1.assign(TAU * self.W1 + (1 - TAU) * self.t_W1),
                self.t_B1.assign(TAU * self.B1 + (1 - TAU) * self.t_B1),
                self.t_W2.assign(TAU * self.W2 + (1 - TAU) * self.t_W2),
                self.t_B2.assign(TAU * self.B2 + (1 - TAU) * self.t_B2),
                self.t_W3.assign(TAU * self.W3 + (1 - TAU) * self.t_W3),
                self.t_B3.assign(TAU * self.B3 + (1 - TAU) * self.t_B3),
                self.t_BN1.updateTarget, self.t_BN2.updateTarget
            ]

            net_path = "./model/an"
            self.saver = tf.train.Saver()
            self.saver.save(self.sess, net_path + "/net.ckpt")
            writer = tf.summary.FileWriter(net_path)
            writer.add_graph(self.sess.graph)
            writer.close()
            self.merged = tf.summary.merge_all()
            self.train_writer = tf.summary.FileWriter(net_path,
                                                      self.sess.graph)
            self.run_metadata = tf.RunMetadata()
def build_model(input_var):

    # Three layer residual block
    def residual_block3(l, base_dim, increase_dim=False, projection=False):
        if increase_dim:
            layer_1 = batch_norm(ConvLayer(l,
                                           num_filters=base_dim,
                                           filter_size=(1, 1),
                                           stride=(2, 2),
                                           nonlinearity=None,
                                           pad='same',
                                           W=nn.init.HeNormal(gain='relu')))
        else:
            layer_1 = batch_norm(ConvLayer(l,
                                           num_filters=base_dim,
                                           filter_size=(1, 1),
                                           stride=(1, 1),
                                           nonlinearity=rectify,
                                           pad='same',
                                           W=nn.init.HeNormal(gain='relu')))
        layer_2 = batch_norm(ConvLayer(layer_1,
                                       num_filters=base_dim,
                                       filter_size=(3, 3),
                                       stride=(1, 1),
                                       nonlinearity=rectify,
                                       pad='same',
                                       W=nn.init.HeNormal(gain='relu')))
        layer_3 = batch_norm(ConvLayer(layer_2,
                                       num_filters=4*base_dim,
                                       filter_size=(1, 1),
                                       stride=(1, 1),
                                       nonlinearity=rectify,
                                       pad='same',
                                       W=nn.init.HeNormal(gain='relu')))

        # add shortcut connection
        if increase_dim:
            if projection:
                # projection shortcut (option B in paper)
                projection = batch_norm(ConvLayer(l,
                                                  num_filters=4*base_dim,
                                                  filter_size=(1, 1),
                                                  stride=(2, 2),
                                                  nonlinearity=None,
                                                  pad='same',
                                                  b=None))
                block = NonlinearityLayer(ElemwiseSumLayer([layer_3, projection]),
                                          nonlinearity=rectify)
            else:
                # identity shortcut (option A in paper)
                # we use a pooling layer to get identity with strides,
                # since identity layers with stride don't exist in Lasagne
                identity = PoolLayer(l, pool_size=1, stride=(2,2),
                                     mode='average_exc_pad')
                padding = PadLayer(identity, [4*base_dim,0,0], batch_ndim=1)
                block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]),
                                          nonlinearity=rectify)

        else:
            block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]),
                                      nonlinearity=rectify)

        return block

    # Input of the network
    input_layer = InputLayer(shape=(batch_size,
                                    num_channels,
                                    input_height,
                                    input_width),
                             input_var=input_var)

    # Very first conv layer
    l = batch_norm(ConvLayer(input_layer,
                             num_filters=64,
                             filter_size=(7, 7),
                             stride=(2, 2),
                             nonlinearity=rectify,
                             pad='same',
                             W=nn.init.HeNormal(gain='relu')))

    # Maxpool layer
    l = MaxPoolLayer(l, pool_size=(3, 3), stride=(2, 2))

    # Convolove with 1x1 filter to match input dimension with the upcoming residual block
    l = batch_norm(ConvLayer(l,
                             num_filters=256,
                             filter_size=(1, 1),
                             stride=(1, 1),
                             nonlinearity=rectify,
                             pad='same',
                             W=nn.init.HeNormal(gain='relu')))

    ############# First residual blocks #############
    for _ in range(num_blocks[0] - 1):
        l = residual_block3(l, base_dim=64)

    ############# Second residual blocks ############
    # Increment Dimension
    l = residual_block3(l, base_dim=128, increase_dim=True, projection=True)
    for _ in range(num_blocks[1] - 1):
        l = residual_block3(l, base_dim=128)

    ############# Third residual blocks #############
    # Increment Dimension
    l = residual_block3(l, base_dim=256, increase_dim=True, projection=True)
    for _ in range(num_blocks[2] - 1):
        l = residual_block3(l, base_dim=256)

    ############# Fourth residual blocks #############
    # Increment Dimension
    l = residual_block3(l, base_dim=512, increase_dim=True, projection=True)
    for _ in range(num_blocks[2] - 1):
        l = residual_block3(l, base_dim=512)

    # Global pooling layer
    l = GlobalPoolLayer(l)

    # Softmax Layer
    softmax_layer = DenseLayer(l, num_units=output_dim,
                         W=nn.init.HeNormal(),
                         nonlinearity=softmax)

    return softmax_layer
def build_model(input_var):

    layers = []

    input_layer = nn.layers.InputLayer(
            shape=(None, num_channels, input_width, input_height),
            input_var=input_var,
            name='inputs'
    )
    layers.append(input_layer)

    conv_1 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(7, 7), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_1)

    pool_1 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
                         #MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    layers.append(pool_1)

    conv_2 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_2)

    conv_3 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_3)

    #pool_2 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_2 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=32, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_2)

    conv_4 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=64, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_4)

    conv_5 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=64, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_5)

    #pool_3 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_3 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=64, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_3)

    conv_6 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_6)

    conv_7 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_7)

    conv_8 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_8)

    conv_9 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_9)

    #pool_4 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_4 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=128, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_4)

    conv_10 = batch_norm(Conv2DLayer(layers[-1],
                          num_filters=256, filter_size=(3, 3), stride=(1, 1),
                          pad='same',
                          nonlinearity=LeakyRectify(leakiness),
                          W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                          untie_biases=True))
    layers.append(conv_10)

    conv_11 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_11)

    conv_12 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_12)

    conv_13 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(1, 1),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(conv_13)

    #pool_5 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2))
    pool_5 = batch_norm(Conv2DLayer(layers[-1],
                         num_filters=256, filter_size=(3, 3), stride=(2, 2),
                         pad='same',
                         nonlinearity=LeakyRectify(leakiness),
                         W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1),
                         untie_biases=True))
    layers.append(pool_5)

    drop_1 = nn.layers.DropoutLayer(layers[-1], p=0.5)
    layers.append(drop_1)

    fc_1 = DenseLayer(layers[-1],
                      num_units=2048,
                      nonlinearity=LeakyRectify(leakiness),
                      W=nn.init.Orthogonal(1.0),
                      b=nn.init.Constant(0.1))
    layers.append(fc_1)

    pool_6 = nn.layers.FeaturePoolLayer(layers[-1],
                                        pool_size=2,
                                        pool_function=T.max)
    layers.append(pool_6)

    drop_2 = nn.layers.DropoutLayer(layers[-1], p=0.5)
    layers.append(drop_2)

    fc_2 = DenseLayer(layers[-1],
                      num_units=1024,
                      nonlinearity=LeakyRectify(leakiness),
                      W=nn.init.Orthogonal(1.0),
                      b=nn.init.Constant(0.1))
    layers.append(fc_2)

    pool_7 = nn.layers.FeaturePoolLayer(layers[-1],
                                        pool_size=2,
                                        pool_function=T.max)
    layers.append(pool_7)

    drop_3 = nn.layers.DropoutLayer(layers[-1], p=0.5)
    layers.append(drop_3)

    softmax_layer = DenseLayer(drop_3,
                      num_units=output_dim,
                      nonlinearity=softmax)
    layers.append(softmax_layer)

    regression_layer = DenseLayer(drop_3,
                        num_units=1,
                        nonlinearity=identity)
    layers.append(regression_layer)

    output = nn.layers.merge.ConcatLayer([softmax_layer, regression_layer])
    layers.append(output)

    return input_layer, output
    def residual_block3(l, base_dim, increase_dim=False, projection=False):
        if increase_dim:
            layer_1 = batch_norm(
                ConvLayer(l,
                          num_filters=base_dim,
                          filter_size=(1, 1),
                          stride=(2, 2),
                          nonlinearity=None,
                          pad='same',
                          W=nn.init.HeNormal(gain='relu')))
        else:
            layer_1 = batch_norm(
                ConvLayer(l,
                          num_filters=base_dim,
                          filter_size=(1, 1),
                          stride=(1, 1),
                          nonlinearity=rectify,
                          pad='same',
                          W=nn.init.HeNormal(gain='relu')))
        layer_2 = batch_norm(
            ConvLayer(layer_1,
                      num_filters=base_dim,
                      filter_size=(3, 3),
                      stride=(1, 1),
                      nonlinearity=rectify,
                      pad='same',
                      W=nn.init.HeNormal(gain='relu')))
        layer_3 = batch_norm(
            ConvLayer(layer_2,
                      num_filters=4 * base_dim,
                      filter_size=(1, 1),
                      stride=(1, 1),
                      nonlinearity=rectify,
                      pad='same',
                      W=nn.init.HeNormal(gain='relu')))

        # add shortcut connection
        if increase_dim:
            if projection:
                # projection shortcut (option B in paper)
                projection = batch_norm(
                    ConvLayer(l,
                              num_filters=4 * base_dim,
                              filter_size=(1, 1),
                              stride=(2, 2),
                              nonlinearity=None,
                              pad='same',
                              b=None))
                block = NonlinearityLayer(ElemwiseSumLayer(
                    [layer_3, projection]),
                                          nonlinearity=rectify)
            else:
                # identity shortcut (option A in paper)
                # we use a pooling layer to get identity with strides,
                # since identity layers with stride don't exist in Lasagne
                identity = PoolLayer(l,
                                     pool_size=1,
                                     stride=(2, 2),
                                     mode='average_exc_pad')
                padding = PadLayer(identity, [4 * base_dim, 0, 0],
                                   batch_ndim=1)
                block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]),
                                          nonlinearity=rectify)

        else:
            block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]),
                                      nonlinearity=rectify)

        return block
def build_model(input_var):

    # Three layer residual block
    def residual_block3(l, base_dim, increase_dim=False, projection=False):
        if increase_dim:
            layer_1 = batch_norm(
                ConvLayer(l,
                          num_filters=base_dim,
                          filter_size=(1, 1),
                          stride=(2, 2),
                          nonlinearity=None,
                          pad='same',
                          W=nn.init.HeNormal(gain='relu')))
        else:
            layer_1 = batch_norm(
                ConvLayer(l,
                          num_filters=base_dim,
                          filter_size=(1, 1),
                          stride=(1, 1),
                          nonlinearity=rectify,
                          pad='same',
                          W=nn.init.HeNormal(gain='relu')))
        layer_2 = batch_norm(
            ConvLayer(layer_1,
                      num_filters=base_dim,
                      filter_size=(3, 3),
                      stride=(1, 1),
                      nonlinearity=rectify,
                      pad='same',
                      W=nn.init.HeNormal(gain='relu')))
        layer_3 = batch_norm(
            ConvLayer(layer_2,
                      num_filters=4 * base_dim,
                      filter_size=(1, 1),
                      stride=(1, 1),
                      nonlinearity=rectify,
                      pad='same',
                      W=nn.init.HeNormal(gain='relu')))

        # add shortcut connection
        if increase_dim:
            if projection:
                # projection shortcut (option B in paper)
                projection = batch_norm(
                    ConvLayer(l,
                              num_filters=4 * base_dim,
                              filter_size=(1, 1),
                              stride=(2, 2),
                              nonlinearity=None,
                              pad='same',
                              b=None))
                block = NonlinearityLayer(ElemwiseSumLayer(
                    [layer_3, projection]),
                                          nonlinearity=rectify)
            else:
                # identity shortcut (option A in paper)
                # we use a pooling layer to get identity with strides,
                # since identity layers with stride don't exist in Lasagne
                identity = PoolLayer(l,
                                     pool_size=1,
                                     stride=(2, 2),
                                     mode='average_exc_pad')
                padding = PadLayer(identity, [4 * base_dim, 0, 0],
                                   batch_ndim=1)
                block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]),
                                          nonlinearity=rectify)

        else:
            block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]),
                                      nonlinearity=rectify)

        return block

    # Input of the network
    input_layer = InputLayer(shape=(batch_size, num_channels, input_height,
                                    input_width),
                             input_var=input_var)

    # Very first conv layer
    l = batch_norm(
        ConvLayer(input_layer,
                  num_filters=64,
                  filter_size=(7, 7),
                  stride=(2, 2),
                  nonlinearity=rectify,
                  pad='same',
                  W=nn.init.HeNormal(gain='relu')))

    # Maxpool layer
    l = MaxPoolLayer(l, pool_size=(3, 3), stride=(2, 2))

    # Convolove with 1x1 filter to match input dimension with the upcoming residual block
    l = batch_norm(
        ConvLayer(l,
                  num_filters=256,
                  filter_size=(1, 1),
                  stride=(1, 1),
                  nonlinearity=rectify,
                  pad='same',
                  W=nn.init.HeNormal(gain='relu')))

    ############# First residual blocks #############
    for _ in range(num_blocks[0] - 1):
        l = residual_block3(l, base_dim=64)

    ############# Second residual blocks ############
    # Increment Dimension
    l = residual_block3(l, base_dim=128, increase_dim=True, projection=True)
    for _ in range(num_blocks[1] - 1):
        l = residual_block3(l, base_dim=128)

    ############# Third residual blocks #############
    # Increment Dimension
    l = residual_block3(l, base_dim=256, increase_dim=True, projection=True)
    for _ in range(num_blocks[2] - 1):
        l = residual_block3(l, base_dim=256)

    ############# Fourth residual blocks #############
    # Increment Dimension
    l = residual_block3(l, base_dim=512, increase_dim=True, projection=True)
    for _ in range(num_blocks[2] - 1):
        l = residual_block3(l, base_dim=512)

    # Global pooling layer
    l = GlobalPoolLayer(l)

    # Softmax Layer
    softmax_layer = DenseLayer(l,
                               num_units=output_dim,
                               W=nn.init.HeNormal(),
                               nonlinearity=softmax)

    return softmax_layer
Example #13
0
    def __init__(self,
                 state_size,
                 action_size,
                 TAU=0.001,
                 write_sum=0,
                 net_size_scale=1):
        self.counter = 0
        self.state_size = state_size
        self.action_size = action_size
        self.output_size = 1
        self.write_sum = write_sum
        self.input_size = state_size + action_size

        self.hidden_dims = [int(n * net_size_scale) for n in CN_N_HIDDENS]
        self.activations = CN_ACTS
        #print("n hiddens: "+str(self.hidden_dims))
        tf.reset_default_graph()
        self.g = tf.Graph()
        with self.g.as_default():
            self.sess = tf.InteractiveSession()

            #Critic Q Network:
            self.state = tf.placeholder("float32", [None, state_size],
                                        name="state")
            self.action = tf.placeholder("float32", [None, action_size],
                                         name="action")
            self.t_state = tf.placeholder("float32", [None, state_size],
                                          name="t_state")
            self.t_action = tf.placeholder("float32", [None, action_size],
                                           name="t_action")
            self.is_training = tf.placeholder(tf.bool, [], name="is_training")

            # Critic network
            self.A1, self.para1, self.bn_para1 = DNN.nn_bn_layer(
                self.state, self.state_size, self.hidden_dims[0], "Layer_1",
                self.activations[0], self.is_training, self.sess)

            with tf.name_scope("Layer_2"):
                with tf.name_scope('weights'):
                    self.W2 = tf.Variable(
                        tf.random_uniform(
                            [self.hidden_dims[0], self.hidden_dims[1]],
                            -1 / math.sqrt(self.hidden_dims[0] + action_size),
                            1 / math.sqrt(self.hidden_dims[0] + action_size)))
                    DNN.variable_summaries(self.W2)
                with tf.name_scope('biases'):
                    self.B2 = tf.Variable(
                        tf.random_uniform(
                            [self.hidden_dims[1]],
                            -1 / math.sqrt(self.hidden_dims[0] + action_size),
                            1 / math.sqrt(self.hidden_dims[0] + action_size)))
                    DNN.variable_summaries(self.B2)
                with tf.name_scope('action_weights'):
                    self.W2_action = tf.Variable(
                        tf.random_uniform(
                            [action_size, self.hidden_dims[1]],
                            -1 / math.sqrt(self.hidden_dims[0] + action_size),
                            1 / math.sqrt(self.hidden_dims[0] + action_size)))
                    DNN.variable_summaries(self.W2_action)
                with tf.name_scope('pre_bn'):
                    self.PBN2 = tf.matmul(self.A1, self.W2) + tf.matmul(
                        self.action, self.W2_action)
                    DNN.variable_summaries(self.PBN2)
                with tf.name_scope('pre_activation'):
                    self.BN2 = batch_norm(self.PBN2, self.hidden_dims[1],
                                          self.is_training, self.sess)
                    DNN.variable_summaries(self.BN2.bnorm)
                with tf.name_scope('activation'):
                    self.A2 = self.activations[1](self.BN2.bnorm)  #+ self.B2
                    DNN.variable_summaries(self.A2)

            self.Q_value, self.out_paras = DNN.nn_layer(
                self.A2, self.hidden_dims[1], self.output_size, "Output",
                self.activations[2])

            self.parameters = [self.para1[0], self.W2, self.W2_action]
            self.parameters.extend(self.out_paras)
            self.bn_paras = [self.bn_para1[0], self.BN2]

            # Target critic network
            self.t_A1, self.t_para1, self.t_bn_para1 = DNN.nn_bn_layer(
                self.t_state, self.state_size, self.hidden_dims[0],
                "T_Layer_1", self.activations[0], self.is_training, self.sess,
                self.bn_paras[0])

            with tf.name_scope("T_Layer_2"):
                with tf.name_scope('weights'):
                    self.t_W2 = tf.Variable(
                        tf.random_uniform(
                            [self.hidden_dims[0], self.hidden_dims[1]],
                            -1 / math.sqrt(self.hidden_dims[0] + action_size),
                            1 / math.sqrt(self.hidden_dims[0] + action_size)))
                    DNN.variable_summaries(self.t_W2)
                with tf.name_scope('biases'):
                    self.t_B2 = tf.Variable(
                        tf.random_uniform(
                            [self.hidden_dims[1]],
                            -1 / math.sqrt(self.hidden_dims[0] + action_size),
                            1 / math.sqrt(self.hidden_dims[0] + action_size)))
                    DNN.variable_summaries(self.t_B2)
                with tf.name_scope('action_weights'):
                    self.t_W2_action = tf.Variable(
                        tf.random_uniform(
                            [action_size, self.hidden_dims[1]],
                            -1 / math.sqrt(self.hidden_dims[0] + action_size),
                            1 / math.sqrt(self.hidden_dims[0] + action_size)))
                    DNN.variable_summaries(self.t_W2_action)
                with tf.name_scope('pre_bn'):
                    self.t_PBN2 = tf.matmul(self.t_A1, self.t_W2) + tf.matmul(
                        self.t_action, self.t_W2_action)
                    DNN.variable_summaries(self.t_PBN2)
                with tf.name_scope('pre_activation'):
                    self.t_BN2 = batch_norm(self.t_PBN2, self.hidden_dims[1],
                                            self.is_training, self.sess,
                                            self.bn_paras[1])
                    DNN.variable_summaries(self.t_BN2.bnorm)
                with tf.name_scope('activation'):
                    self.t_A2 = self.activations[1](
                        self.t_BN2.bnorm)  #+ self.B2
                    DNN.variable_summaries(self.t_A2)

            self.t_Q_value, self.t_out_paras = DNN.nn_layer(
                self.t_A2, self.hidden_dims[1], self.output_size, "T_Output",
                self.activations[2])

            self.t_parameters = [self.t_para1[0], self.t_W2, self.t_W2_action]
            self.t_parameters.extend(self.t_out_paras)
            self.t_bn_paras = [self.t_bn_para1[0], self.t_BN2]

            self.parameters_name = ["W1", "W2", "W2_action", "W3", "B3"]
            self.bn_parameters_name = ["BN1", "BN2"]
            self.num_paras = len(self.parameters)
            self.num_bn = len(self.bn_paras)

            self.Q_obj = tf.placeholder("float32", [None, 1],
                                        name="obj_Q")  #supervisor
            self.learning_rate = tf.placeholder("float32",
                                                shape=[],
                                                name="learning_rate")

            with tf.name_scope("cost"):
                #self.l2_regularizer_loss = tf.nn.l2_loss(self.W1_state)+tf.nn.l2_loss(self.W2)+ tf.nn.l2_loss(self.W2_action) + tf.nn.l2_loss(self.W3)+tf.nn.l2_loss(self.B1)+tf.nn.l2_loss(self.B2)+tf.nn.l2_loss(self.B3)
                #self.l2_regularizer_loss = 0.0001*tf.reduce_mean( tf.square( self.W2 ) )
                #self.cost                = tf.pow( self.Q_value - self.Q_obj, 2 )/AC_BATCH_SIZE + self.l2_regularizer_loss#/tf.to_float(tf.shape(self.Q_obj)[0])
                self.cost = tf.reduce_mean(tf.square(self.Q_value -
                                                     self.Q_obj))
            with tf.name_scope("Grad_min_cost"):
                self.optimizer = tf.train.AdamOptimizer(
                    self.learning_rate,
                    name="critic_grad").minimize(self.cost, name="min_cost")

            #action gradient to be used in actor network:
            with tf.name_scope("Grad_Q2a"):
                self.act_grad_v = tf.gradients(self.Q_value,
                                               self.action,
                                               name="Grad_to_action")
                self.action_gradients = [
                    self.act_grad_v[0] /
                    tf.to_float(tf.shape(self.act_grad_v[0])[0])
                ]  #this is just divided by batch size

            #initialize all tensor variable parameters:
            self.sess.run(tf.global_variables_initializer())

            self.init_target_op = [
            ]  # To initialize critic and target with the same values: copy target parameters
            self.update_target_critic_op = [
            ]  # Operations to update target network, including BN parameters
            for i in range(self.num_paras):
                self.init_target_op.append(self.t_parameters[i].assign(
                    self.parameters[i]))
                self.update_target_critic_op.append(
                    self.t_parameters[i].assign(TAU * self.parameters[i] +
                                                (1 - TAU) *
                                                self.t_parameters[i]))

            for i in range(self.num_bn):
                self.update_target_critic_op.append(
                    self.t_bn_paras[i].updateTarget)

            self.sess.run(self.init_target_op)

            # operations to be evaluated during training, including apply gradient and update statistics in BN layers
            self.train_op = [self.optimizer]
            for i in range(self.num_bn):
                self.train_op.append(self.bn_paras[i].train_mean)
                self.train_op.append(self.bn_paras[i].train_var)
                self.train_op.append(self.t_bn_paras[i].train_mean)
                self.train_op.append(self.t_bn_paras[i].train_var)

            net_path = "./model/cn"
            if self.write_sum > 0:
                self.saver = tf.train.Saver()
                self.saver.save(self.sess, net_path + "/net.ckpt")
                writer = tf.summary.FileWriter(net_path)
                writer.add_graph(self.sess.graph)
                writer.close()
                self.merged = tf.summary.merge_all()
                self.train_writer = tf.summary.FileWriter(
                    net_path, self.sess.graph)
                self.run_metadata = tf.RunMetadata()
Example #14
0
    def __init__(self, num_states, num_actions):
        tf.reset_default_graph()
        self.g = tf.Graph()
        with self.g.as_default():
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config)

            # actor network model parameters:
            self.actor_state_in = tf.placeholder("float", [None, num_states])
            self.W1_a = tf.Variable(
                tf.random_uniform([num_states, N_HIDDEN_1],
                                  -1 / math.sqrt(num_states),
                                  1 / math.sqrt(num_states)))
            self.B1_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states),
                                  1 / math.sqrt(num_states)))
            self.W2_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2],
                                  -1 / math.sqrt(N_HIDDEN_1),
                                  1 / math.sqrt(N_HIDDEN_1)))
            self.B2_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1),
                                  1 / math.sqrt(N_HIDDEN_1)))
            self.W3_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_2, num_actions], -0.003, 0.003))
            self.B3_a = tf.Variable(
                tf.random_uniform([num_actions], -0.003, 0.003))

            self.is_training = tf.placeholder(tf.bool, [])
            self.H1_t = tf.matmul(self.actor_state_in, self.W1_a)
            self.H1_a_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training,
                                      self.sess)
            self.H1_a = tf.nn.softplus(self.H1_a_bn.bnorm) + self.B1_a

            self.H2_t = tf.matmul(self.H1_a, self.W2_a)
            self.H2_a_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training,
                                      self.sess)
            self.H2_a = tf.nn.tanh(self.H2_a_bn.bnorm) + self.B2_a
            self.actor_model = tf.matmul(self.H2_a, self.W3_a) + self.B3_a

            # target actor network model parameters:
            self.t_actor_state_in = tf.placeholder("float", [None, num_states])
            self.t_W1_a = tf.Variable(
                tf.random_uniform([num_states, N_HIDDEN_1],
                                  -1 / math.sqrt(num_states),
                                  1 / math.sqrt(num_states)))
            self.t_B1_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states),
                                  1 / math.sqrt(num_states)))
            self.t_W2_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2],
                                  -1 / math.sqrt(N_HIDDEN_1),
                                  1 / math.sqrt(N_HIDDEN_1)))
            self.t_B2_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1),
                                  1 / math.sqrt(N_HIDDEN_1)))
            self.t_W3_a = tf.Variable(
                tf.random_uniform([N_HIDDEN_2, num_actions], -0.003, 0.003))
            self.t_B3_a = tf.Variable(
                tf.random_uniform([num_actions], -0.003, 0.003))

            self.t_is_training = tf.placeholder(tf.bool, [])
            self.t_H1_t = tf.matmul(self.t_actor_state_in, self.t_W1_a)
            self.t_H1_a_bn = batch_norm(self.t_H1_t, N_HIDDEN_1,
                                        self.t_is_training, self.sess,
                                        self.H1_a_bn)
            self.t_H1_a = tf.nn.softplus(self.t_H1_a_bn.bnorm) + self.t_B1_a

            self.t_H2_t = tf.matmul(self.t_H1_a, self.t_W2_a)
            self.t_H2_a_bn = batch_norm(self.t_H2_t, N_HIDDEN_2,
                                        self.t_is_training, self.sess,
                                        self.H2_a_bn)
            self.t_H2_a = tf.nn.tanh(self.t_H2_a_bn.bnorm) + self.t_B2_a
            self.t_actor_model = tf.matmul(self.t_H2_a,
                                           self.t_W3_a) + self.t_B3_a

            # cost of actor network:
            # gets input from action_gradient computed in critic network file
            self.q_gradient_input = tf.placeholder("float",
                                                   [None, num_actions])
            self.actor_parameters = [
                self.W1_a, self.B1_a, self.W2_a, self.B2_a, self.W3_a,
                self.B3_a, self.H1_a_bn.scale, self.H1_a_bn.beta,
                self.H2_a_bn.scale, self.H2_a_bn.beta
            ]
            # /BATCH_SIZE) changed -self.q_gradient to -
            self.parameters_gradients = tf.gradients(self.actor_model,
                                                     self.actor_parameters,
                                                     -self.q_gradient_input)

            self.optimizer = tf.train.AdamOptimizer(
                learning_rate=LEARNING_RATE, epsilon=1e-08).apply_gradients(
                    zip(self.parameters_gradients, self.actor_parameters))
            # initialize all tensor variable parameters:
            self.sess.run(tf.initialize_all_variables())

            # To make sure actor and target have same intial parmameters copy the parameters:
            # copy target parameters
            self.sess.run([
                self.t_W1_a.assign(self.W1_a),
                self.t_B1_a.assign(self.B1_a),
                self.t_W2_a.assign(self.W2_a),
                self.t_B2_a.assign(self.B2_a),
                self.t_W3_a.assign(self.W3_a),
                self.t_B3_a.assign(self.B3_a)
            ])
Example #15
0
    def __init__(self, state_size, action_size, TAU = 0.001, write_sum = 0):
        self.counter     = 0
        self.write_sum   = write_sum
        self.activations = [tf.nn.softplus, tf.nn.relu, lambda x, name=[]: x]

        tf.reset_default_graph()
        self.g = tf.Graph()
        with self.g.as_default():
            self.sess = tf.InteractiveSession()
            
            #Critic Q Network:
            self.state       = tf.placeholder( "float32", [None, state_size],  name="state"  )
            self.action      = tf.placeholder( "float32", [None, action_size], name="action"  ) 
            self.is_training = tf.placeholder( tf.bool,   [],                  name="is_training" )
            
            with tf.name_scope("Layer_1"):
                with tf.name_scope('weights'):
                    self.W1   = tf.Variable( tf.random_uniform( [state_size, N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size)))
                    DNN.variable_summaries(self.W1)
                with tf.name_scope('biases'):
                    self.B1   = tf.Variable( tf.random_uniform( [N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size)))
                    DNN.variable_summaries(self.B1)
                with tf.name_scope('pre_bn'):
                    self.PBN1 = tf.matmul( self.state, self.W1 )
                    DNN.variable_summaries(self.PBN1)
                with tf.name_scope('pre_activation'):
                    self.BN1  = batch_norm( self.PBN1, N_HIDDEN_1, self.is_training, self.sess )
                    DNN.variable_summaries(self.BN1.bnorm)
                with tf.name_scope('activation'):
                    self.A1   = self.activations[0]( self.BN1.bnorm ) #+ self.B1 
                    DNN.variable_summaries(self.A1)

            with tf.name_scope("Layer_2"):
                with tf.name_scope('weights'):
                    self.W2   = tf.Variable( tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2],  -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size)))  
                    DNN.variable_summaries(self.W2)
                with tf.name_scope('biases'):
                    self.B2   = tf.Variable( tf.random_uniform( [N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size)))
                    DNN.variable_summaries(self.B2)
                with tf.name_scope('action_weights'):
                    self.W2_action = tf.Variable( tf.random_uniform( [action_size, N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size)))
                    DNN.variable_summaries(self.W2_action)
                with tf.name_scope('pre_bn'):
                    self.PBN2 = tf.matmul(  self.A1, self.W2) + tf.matmul( self.action, self.W2_action )
                    DNN.variable_summaries(self.PBN2)
                with tf.name_scope('pre_activation'):
                    self.BN2  = batch_norm( self.PBN2, N_HIDDEN_2, self.is_training, self.sess )
                    DNN.variable_summaries(self.BN2.bnorm)
                with tf.name_scope('activation'):
                    self.A2   = self.activations[1]( self.BN2.bnorm ) #+ self.B2
                    DNN.variable_summaries(self.A2)

            with tf.name_scope("Output_layer"):
                with tf.name_scope('weights'):
                    self.W3 = tf.Variable( tf.random_uniform( [N_HIDDEN_2, 1],  -0.003,  0.003 ) )
                    DNN.variable_summaries(self.W3)
                with tf.name_scope('biases'):
                    self.B3 = tf.Variable( tf.random_uniform( [1], -0.003, 0.003 ) )
                    DNN.variable_summaries(self.B3)
                with tf.name_scope('activation'):
                    self.Q_value = self.activations[2]( tf.matmul(  self.A2, self.W3 ) + self.B3 )
                    DNN.variable_summaries( self.Q_value )


           # Target Critic Q Network:
            self.t_state  = tf.placeholder( "float32", [None,state_size] , name="t_state"  )
            self.t_action = tf.placeholder( "float32", [None,action_size], name="t_action" )
            
            with tf.name_scope("T_Layer_1"):
                with tf.name_scope('weights'):
                    self.t_W1   = tf.Variable( tf.random_uniform( [state_size,N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size) ) )
                with tf.name_scope('biases'):
                    self.t_B1   = tf.Variable( tf.random_uniform( [N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size) ) )
                with tf.name_scope('pre_bn'):    
                    self.t_PBN1 = tf.matmul( self.t_state, self.t_W1 )
                with tf.name_scope('pre_activation'):    
                    self.t_BN1  = batch_norm( self.t_PBN1, N_HIDDEN_1, self.is_training, self.sess, self.BN1 )        
                with tf.name_scope('activation'):    
                    self.t_A1   = self.activations[0]( self.t_BN1.bnorm ) #+ self.t_B1
            
            with tf.name_scope("T_Layer_2"):
                with tf.name_scope('weights'):
                    self.t_W2   = tf.Variable( tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2],  -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size) ) )  
                    self.t_W2_action = tf.Variable( tf.random_uniform( [action_size, N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size) ) )
                with tf.name_scope('biases'):
                    self.t_B2   = tf.Variable( tf.random_uniform( [N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size) ) )
                with tf.name_scope('pre_bn'):    
                    self.t_PBN2 = tf.matmul( self.t_A1, self.t_W2) + tf.matmul( self.t_action, self.t_W2_action )
                with tf.name_scope('pre_activation'):    
                    self.t_BN2  = batch_norm( self.t_PBN2, N_HIDDEN_2, self.is_training, self.sess, self.BN2 )
                with tf.name_scope('activation'):    
                    self.t_A2   = self.activations[1]( self.t_BN2.bnorm ) #+ self.t_B2 
            
            with tf.name_scope("T_Output_layer"):
                with tf.name_scope('weights'):
                    self.t_W3 = tf.Variable( tf.random_uniform( [N_HIDDEN_2,1], -0.003, 0.003 ) )
                with tf.name_scope('biases'):
                    self.t_B3 = tf.Variable( tf.random_uniform( [1], -0.003, 0.003 ) )
                with tf.name_scope('activation'):
                    self.t_Q_value = self.activations[2]( tf.matmul( self.t_A2, self.t_W3 ) + self.t_B3 )
            
            
            self.Q_obj         = tf.placeholder( "float32", [None,1], name="obj_Q" ) #supervisor
            self.learning_rate = tf.placeholder( "float32", shape=[], name="learning_rate" )

            #self.l2_regularizer_loss = tf.nn.l2_loss(self.W1)+tf.nn.l2_loss(self.W2)+ tf.nn.l2_loss(self.W2_action) + tf.nn.l2_loss(self.W3)+tf.nn.l2_loss(self.B1)+tf.nn.l2_loss(self.B2)+tf.nn.l2_loss(self.B3) 
            #self.l2_regularizer_loss = 0.0001*tf.reduce_mean( tf.square( self.W2 ) )
            with tf.name_scope("cost"):
                #self.cost           = tf.pow( self.Q_value - self.Q_obj, 2 )/AC_BATCH_SIZE + self.l2_regularizer_loss#/tf.to_float(tf.shape(self.Q_obj)[0])
                self.cost            = tf.reduce_mean( tf.square( self.Q_value - self.Q_obj ) )
            self.optimizer           = tf.train.AdamOptimizer( self.learning_rate ).minimize(self.cost)
            self.act_grad_v          = tf.gradients( self.Q_value, self.action )
            self.action_gradients    = [ self.act_grad_v[0]/tf.to_float( tf.shape(self.act_grad_v[0])[0] ) ] #this is just divided by batch size
            #from simple actor net:
            self.check_fl            = self.action_gradients             
            
            #initialize all tensor variable parameters:
            self.sess.run( tf.global_variables_initializer() )
            
            #To initialize critic and target with the same values:
            # copy target parameters
            self.sess.run([
				self.t_W1.assign( self.W1 ),
				self.t_B1.assign( self.B1 ),
				self.t_W2.assign( self.W2 ),
				self.t_W2_action.assign( self.W2_action ),
				self.t_B2.assign( self.B2 ),
				self.t_W3.assign( self.W3 ),
				self.t_B3.assign( self.B3 )
			])
            
            self.update_target_critic_op = [
                self.t_W1.assign( TAU*self.W1 + (1-TAU)*self.t_W1 ),
                self.t_B1.assign( TAU*self.B1 + (1-TAU)*self.t_B1 ),  
                self.t_W2.assign( TAU*self.W2 + (1-TAU)*self.t_W2 ),
                self.t_W2_action.assign( TAU*self.W2_action + (1-TAU)*self.t_W2_action ),
                self.t_B2.assign( TAU*self.B2 + (1-TAU)*self.t_B2 ),
                self.t_W3.assign( TAU*self.W3 + (1-TAU)*self.t_W3 ),
                self.t_B3.assign( TAU*self.B3 + (1-TAU)*self.t_B3 ),
                self.t_BN1.updateTarget,
                self.t_BN2.updateTarget
            ]

            net_path   = "./model/cn"
            if self.write_sum > 0:
                self.saver = tf.train.Saver()
                self.saver.save(self.sess, net_path + "/net.ckpt")
                writer = tf.summary.FileWriter( net_path )
                writer.add_graph(self.sess.graph)
                writer.close()
                self.merged       = tf.summary.merge_all()
                self.train_writer = tf.summary.FileWriter( net_path, self.sess.graph )
                self.run_metadata = tf.RunMetadata()