def residual_block3(l, base_dim, increase_dim=False, projection=False): if increase_dim: layer_1 = batch_norm(ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', W=nn.init.HeNormal(gain='relu'))) else: layer_1 = batch_norm(ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_2 = batch_norm(ConvLayer(layer_1, num_filters=base_dim, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_3 = batch_norm(ConvLayer(layer_2, num_filters=4*base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # add shortcut connection if increase_dim: if projection: # projection shortcut (option B in paper) projection = batch_norm(ConvLayer(l, num_filters=4*base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None)) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, projection]), nonlinearity=rectify) else: # identity shortcut (option A in paper) # we use a pooling layer to get identity with strides, # since identity layers with stride don't exist in Lasagne identity = PoolLayer(l, pool_size=1, stride=(2,2), mode='average_exc_pad') padding = PadLayer(identity, [4*base_dim,0,0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]), nonlinearity=rectify) return block
def conv_relu(X, kernel_shape, conv_std, bias_val, do_batch_norm, train_phase, add_summary=True): weights = tf.get_variable( "weights", kernel_shape, initializer=tf.contrib.layers.xavier_initializer()) # tf.random_normal_initializer(stddev=conv_std)) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(bias_val)) if add_summary: with tf.variable_scope('weights'): PoseTools.variable_summaries(weights) # PoseTools.variable_summaries(biases) conv = tf.nn.conv2d(X, weights, strides=[1, 1, 1, 1], padding='SAME') if do_batch_norm: conv = batch_norm(conv, train_phase) with tf.variable_scope('conv'): PoseTools.variable_summaries(conv) return tf.nn.relu(conv - biases)
def nn_bn_layer(input_tensor, input_dim, output_dim, layer_name, act, is_training, sess, parForTarget=None): """Reusable code for making a simple neural net layer. It does a matrix multiply, bias add, and then uses ReLU to nonlinearize. It also sets up name scoping so that the resultant graph is easy to read, and adds a number of summary ops. """ # Adding a name scope ensures logical grouping of the layers in the graph. #input_dim = input_tensor.shape[0] with tf.name_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('weights'): weights = weight_variable([input_dim, output_dim]) variable_summaries(weights) with tf.name_scope('pre_bn'): PBN = tf.matmul(input_tensor, weights) variable_summaries(PBN) with tf.name_scope('pre_activation'): BN = batch_norm(PBN, output_dim, is_training, sess, parForTarget=parForTarget) with tf.name_scope('activation'): activations = act(BN.bnorm) variable_summaries(activations) layer_paras = [weights] return activations, layer_paras, [BN]
def conv_relu_norm_init(X, kernel_shape, conv_std, bias_val, do_batch_norm, train_phase, add_summary=True): weights = tf.get_variable("weights", kernel_shape, initializer=tf.random_normal_initializer(stddev=conv_std)) biases = tf.get_variable("biases", kernel_shape[-1], initializer=tf.constant_initializer(bias_val)) if add_summary: with tf.variable_scope('weights'): PoseTools.variable_summaries(weights) # PoseTools.variable_summaries(biases) conv = tf.nn.conv2d(X, weights, strides=[1, 1, 1, 1], padding='SAME') if do_batch_norm: conv = batch_norm(conv, train_phase) with tf.variable_scope('conv'): PoseTools.variable_summaries(conv) return tf.nn.relu(conv - biases)
is_training = tf.placeholder(tf.bool, name='is_training') LUSV=tf.placeholder(tf.float32) lr=tf.placeholder(tf.float32) kernel = _variable_with_weight_decay('conv0', shape=[3, 3, 3, 16], stddev=np.sqrt(2.0/3/9) , wd=weight_d) trainWs.append(kernel) orthoInit0=kernel.assign(svd_orthonormal(kernel.get_shape().as_list())) upd0=kernel.assign(kernel/LUSV) ConvLayer0 = tf.nn.conv2d(x, kernel, [1, 1, 1, 1], padding='SAME') print(ConvLayer0.get_shape().as_list()) if ifbatchnorm: net = batch_norm(ConvLayer0,is_training,scope='conv0') net = tf.nn.relu(net) if visual: _activation_summary(net) resLayer=[] orthoInit=[] kernel_upd=[] for block in range(3): blocks=[] nfilters=16<<block for layer in range(repeat_layer): layers=[] net_copy=net for i in range(2): name = 'block_%d/layer_%d/conv%d' % (block, layer,i)
def __init__(self, num_states, num_actions): tf.reset_default_graph() self.g = tf.Graph() with self.g.as_default(): self.sess = tf.InteractiveSession() # Critic Q Network: self.critic_state_in = tf.placeholder("float", [None, num_states]) self.critic_action_in = tf.placeholder("float", [None, num_actions]) self.W1_c = tf.Variable(tf.random_uniform( [num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.B1_c = tf.Variable(tf.random_uniform( [N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.W2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions))) self.B2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions))) self.W2_action_c = tf.Variable(tf.random_uniform( [num_actions, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions))) self.W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003, 0.003)) self.B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003)) self.is_training = tf.placeholder(tf.bool, []) self.H1_t = tf.matmul(self.critic_state_in, self.W1_c) self.H1_c_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training, self.sess) self.H1_c = tf.nn.softplus(self.H1_c_bn.bnorm) + self.B1_c self.H2_t = tf.matmul(self.H1_c, self.W2_c) + \ tf.matmul(self.critic_action_in, self.W2_action_c) self.H2_c_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training, self.sess) self.H2_c = tf.nn.tanh(self.H2_c_bn.bnorm) + self.B2_c self.critic_q_model = tf.matmul(self.H2_c, self.W3_c) + self.B3_c # Target Critic Q Network: self.t_critic_state_in = tf.placeholder("float", [None, num_states]) self.t_critic_action_in = tf.placeholder("float", [None, num_actions]) self.t_W1_c = tf.Variable(tf.random_uniform( [num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.t_B1_c = tf.Variable(tf.random_uniform( [N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.t_W2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions))) self.t_W2_action_c = tf.Variable(tf.random_uniform( [num_actions, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions))) self.t_B2_c = tf.Variable(tf.random_uniform( [N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1 + num_actions), 1 / math.sqrt(N_HIDDEN_1 + num_actions))) self.t_W3_c = tf.Variable(tf.random_uniform([N_HIDDEN_2, 1], -0.003, 0.003)) self.t_B3_c = tf.Variable(tf.random_uniform([1], -0.003, 0.003)) self.t_H1_t = tf.matmul(self.t_critic_state_in, self.t_W1_c) self.t_H1_c_bn = batch_norm(self.t_H1_t, N_HIDDEN_1, self.is_training, self.sess, self.H1_c_bn) self.t_H1_c = tf.nn.softplus(self.t_H1_c_bn.bnorm) + self.t_B1_c self.t_H2_t = tf.matmul(self.t_H1_c, self.t_W2_c) + \ tf.matmul(self.t_critic_action_in, self.t_W2_action_c) self.t_H2_c_bn = batch_norm(self.t_H2_t, N_HIDDEN_2, self.is_training, self.sess, self.H2_c_bn) self.t_H2_c = tf.nn.tanh(self.t_H2_c_bn.bnorm) + self.t_B2_c self.t_critic_q_model = tf.matmul(self.t_H2_c, self.t_W3_c) + self.t_B3_c self.q_value_in = tf.placeholder("float", [None, 1]) # supervisor #self.l2_regularizer_loss = tf.nn.l2_loss(self.W1_c)+tf.nn.l2_loss(self.W2_c)+ tf.nn.l2_loss(self.W2_action_c) + tf.nn.l2_loss(self.W3_c)+tf.nn.l2_loss(self.B1_c)+tf.nn.l2_loss(self.B2_c)+tf.nn.l2_loss(self.B3_c) self.l2_regularizer_loss = 0.0001 * tf.reduce_sum(tf.pow(self.W2_c, 2)) self.cost = tf.pow(self.critic_q_model - self.q_value_in, 2) / BATCH_SIZE + \ self.l2_regularizer_loss # /tf.to_float(tf.shape(self.q_value_in)[0]) self.optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(self.cost) self.act_grad_v = tf.gradients(self.critic_q_model, self.critic_action_in) # this is just divided by batch size self.action_gradients = [self.act_grad_v[0] / tf.to_float(tf.shape(self.act_grad_v[0])[0])] # from simple actor net: self.check_fl = self.action_gradients # initialize all tensor variable parameters: self.sess.run(tf.initialize_all_variables()) # To initialize critic and target with the same values: # copy target parameters self.sess.run([ self.t_W1_c.assign(self.W1_c), self.t_B1_c.assign(self.B1_c), self.t_W2_c.assign(self.W2_c), self.t_W2_action_c.assign(self.W2_action_c), self.t_B2_c.assign(self.B2_c), self.t_W3_c.assign(self.W3_c), self.t_B3_c.assign(self.B3_c) ])
def build_model(input_var): layers = [] input_layer = nn.layers.InputLayer( shape=(batch_size, num_channels, input_width, input_height), input_var=input_var, name='inputs' ) layers.append(input_layer) conv_1 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(7, 7), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_1) pool_1 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) #MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) layers.append(pool_1) conv_2 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_2) conv_3 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_3) #pool_2 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_2 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_2) conv_4 = batch_norm(Conv2DLayer(layers[-1], num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_4) conv_5 = batch_norm(Conv2DLayer(layers[-1], num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_5) #pool_3 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_3 = batch_norm(Conv2DLayer(layers[-1], num_filters=64, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_3) conv_6 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_6) conv_7 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_7) conv_8 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_8) conv_9 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_9) #pool_4 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_4 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_4) conv_10 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_10) conv_11 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_11) conv_12 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_12) conv_13 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_13) #pool_5 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_5 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_5) drop_1 = nn.layers.DropoutLayer(layers[-1], p=0.5) layers.append(drop_1) fc_1 = DenseLayer(layers[-1], num_units=1024, nonlinearity=None, W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1)) layers.append(fc_1) pool_6 = nn.layers.FeaturePoolLayer(layers[-1], pool_size=2, pool_function=T.max) layers.append(pool_6) merge_eyes = nn.layers.ReshapeLayer(layers[-1], shape=(batch_size // 2, -1)) layers.append(merge_eyes) drop_2 = nn.layers.DropoutLayer(layers[-1], p=0.5) layers.append(drop_2) fc_2 = DenseLayer(layers[-1], num_units=1024, nonlinearity=None, W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1)) layers.append(fc_2) pool_7 = nn.layers.FeaturePoolLayer(layers[-1], pool_size=2, pool_function=T.max) layers.append(pool_7) drop_3 = nn.layers.DropoutLayer(layers[-1], p=0.5) layers.append(drop_3) fc_3 = DenseLayer(layers[-1], num_units=output_dim * 2, nonlinearity=None, W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1)) layers.append(fc_3) split_eyes = nn.layers.ReshapeLayer(layers[-1], shape=(batch_size, 5)) layers.append(split_eyes) softmax_layer = nn.layers.NonlinearityLayer(layers[-1], nonlinearity=nn.nonlinearities.softmax) layers.append(softmax_layer) return input_layer, softmax_layer
def __init__(self, state_size, action_size, TAU=0.001, write_sum=0): tf.reset_default_graph() self.counter = 0 self.write_sum = write_sum # if to write the summary file ntanh = lambda x, name=[]: (tf.nn.tanh(x) + 1) / 2 self.activations = [tf.nn.softplus, tf.nn.relu, ntanh] self.g = tf.Graph() with self.g.as_default(): self.sess = tf.InteractiveSession() #actor network model parameters: self.state = tf.placeholder("float32", [None, state_size], name="state") self.is_training = tf.placeholder(tf.bool, [], name="is_training") with tf.name_scope("Layer_1"): with tf.name_scope('weights'): self.W1 = tf.Variable( tf.random_uniform([state_size, N_HIDDEN_1], -1 / math.sqrt(state_size), 1 / math.sqrt(state_size))) DNN.variable_summaries(self.W1) with tf.name_scope('biases'): self.B1 = tf.Variable( tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(state_size), 1 / math.sqrt(state_size))) DNN.variable_summaries(self.B1) with tf.name_scope('pre_bn'): self.PBN1 = tf.matmul(self.state, self.W1) DNN.variable_summaries(self.PBN1) with tf.name_scope('pre_activation'): self.BN1 = batch_norm(self.PBN1, N_HIDDEN_1, self.is_training, self.sess) DNN.variable_summaries(self.BN1.bnorm) with tf.name_scope('activation'): self.A1 = self.activations[0](self.BN1.bnorm) # + self.B1 DNN.variable_summaries(self.A1) with tf.name_scope("Layer_2"): with tf.name_scope('weights'): self.W2 = tf.Variable( tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) DNN.variable_summaries(self.W2) with tf.name_scope('biases'): self.B2 = tf.Variable( tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) DNN.variable_summaries(self.B2) with tf.name_scope('pre_bn'): self.PBN2 = tf.matmul(self.A1, self.W2) DNN.variable_summaries(self.PBN2) with tf.name_scope('pre_activation'): self.BN2 = batch_norm(self.PBN2, N_HIDDEN_2, self.is_training, self.sess) DNN.variable_summaries(self.BN2.bnorm) with tf.name_scope('activation'): self.A2 = self.activations[1](self.BN2.bnorm) # + self.B2 DNN.variable_summaries(self.A2) with tf.name_scope("Output_layer"): with tf.name_scope('weights'): self.W3 = tf.Variable( tf.random_uniform([N_HIDDEN_2, action_size], -0.003, 0.003)) DNN.variable_summaries(self.W3) with tf.name_scope('biases'): self.B3 = tf.Variable( tf.random_uniform([action_size], -0.003, 0.003)) DNN.variable_summaries(self.B3) with tf.name_scope('activation'): self.action = self.activations[2]( tf.matmul(self.A2, self.W3) + self.B3) DNN.variable_summaries(self.action) #target actor network model parameters: self.t_state = tf.placeholder("float32", [None, state_size], name="t_state") with tf.name_scope("T_Layer_1"): with tf.name_scope('weights'): self.t_W1 = tf.Variable( tf.random_uniform([state_size, N_HIDDEN_1], -1 / math.sqrt(state_size), 1 / math.sqrt(state_size))) with tf.name_scope('biases'): self.t_B1 = tf.Variable( tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(state_size), 1 / math.sqrt(state_size))) with tf.name_scope('pre_bn'): self.t_PBN1 = tf.matmul(self.t_state, self.t_W1) with tf.name_scope('pre_activation'): self.t_BN1 = batch_norm(self.t_PBN1, N_HIDDEN_1, self.is_training, self.sess, self.BN1) with tf.name_scope('activation'): self.t_A1 = self.activations[0]( self.t_BN1.bnorm) # + self.t_B1 with tf.name_scope("T_Layer_2"): with tf.name_scope('weights'): self.t_W2 = tf.Variable( tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) with tf.name_scope('biases'): self.t_B2 = tf.Variable( tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) with tf.name_scope('pre_bn'): self.t_PBN2 = tf.matmul(self.t_A1, self.t_W2) with tf.name_scope('pre_activation'): self.t_BN2 = batch_norm(self.t_PBN2, N_HIDDEN_2, self.is_training, self.sess, self.BN2) with tf.name_scope('activation'): self.t_A2 = self.activations[1]( self.t_BN2.bnorm) # + self.t_B2 with tf.name_scope("T_Output_layer"): with tf.name_scope('weights'): self.t_W3 = tf.Variable( tf.random_uniform([N_HIDDEN_2, action_size], -0.003, 0.003)) with tf.name_scope('biases'): self.t_B3 = tf.Variable( tf.random_uniform([action_size], -0.003, 0.003)) with tf.name_scope('activation'): self.t_action = self.activations[2]( tf.matmul(self.t_A2, self.t_W3) + self.t_B3) self.learning_rate = tf.placeholder("float32", shape=[], name="learning_rate") self.obj_action = tf.placeholder("float32", [None, action_size], name="obj_action") self.q_gradient_input = tf.placeholder( "float32", [None, action_size], name="q_gradient_input" ) #gets input from action_gradient computed in critic network file #cost of actor network: with tf.name_scope('cost'): self.cost = tf.reduce_mean( tf.square(tf.round(self.action) - self.obj_action)) self.actor_parameters = [ self.W1, self.B1, self.W2, self.B2, self.W3, self.B3, self.BN1.scale, self.BN1.beta, self.BN2.scale, self.BN2.beta ] self.parameters_gradients = tf.gradients( self.action, self.actor_parameters, -self.q_gradient_input ) #/BATCH_SIZE) changed -self.q_gradient to - self.optimizer = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(self.parameters_gradients, self.actor_parameters)) #initialize all tensor variable parameters: self.sess.run(tf.global_variables_initializer()) #To make sure actor and target have same intial parmameters copy the parameters: # copy target parameters self.sess.run([ self.t_W1.assign(self.W1), self.t_B1.assign(self.B1), self.t_W2.assign(self.W2), self.t_B2.assign(self.B2), self.t_W3.assign(self.W3), self.t_B3.assign(self.B3) ]) self.update_target_actor_op = [ self.t_W1.assign(TAU * self.W1 + (1 - TAU) * self.t_W1), self.t_B1.assign(TAU * self.B1 + (1 - TAU) * self.t_B1), self.t_W2.assign(TAU * self.W2 + (1 - TAU) * self.t_W2), self.t_B2.assign(TAU * self.B2 + (1 - TAU) * self.t_B2), self.t_W3.assign(TAU * self.W3 + (1 - TAU) * self.t_W3), self.t_B3.assign(TAU * self.B3 + (1 - TAU) * self.t_B3), self.t_BN1.updateTarget, self.t_BN2.updateTarget ] net_path = "./model/an" self.saver = tf.train.Saver() self.saver.save(self.sess, net_path + "/net.ckpt") writer = tf.summary.FileWriter(net_path) writer.add_graph(self.sess.graph) writer.close() self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter(net_path, self.sess.graph) self.run_metadata = tf.RunMetadata()
def build_model(input_var): # Three layer residual block def residual_block3(l, base_dim, increase_dim=False, projection=False): if increase_dim: layer_1 = batch_norm(ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', W=nn.init.HeNormal(gain='relu'))) else: layer_1 = batch_norm(ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_2 = batch_norm(ConvLayer(layer_1, num_filters=base_dim, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_3 = batch_norm(ConvLayer(layer_2, num_filters=4*base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # add shortcut connection if increase_dim: if projection: # projection shortcut (option B in paper) projection = batch_norm(ConvLayer(l, num_filters=4*base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None)) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, projection]), nonlinearity=rectify) else: # identity shortcut (option A in paper) # we use a pooling layer to get identity with strides, # since identity layers with stride don't exist in Lasagne identity = PoolLayer(l, pool_size=1, stride=(2,2), mode='average_exc_pad') padding = PadLayer(identity, [4*base_dim,0,0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]), nonlinearity=rectify) return block # Input of the network input_layer = InputLayer(shape=(batch_size, num_channels, input_height, input_width), input_var=input_var) # Very first conv layer l = batch_norm(ConvLayer(input_layer, num_filters=64, filter_size=(7, 7), stride=(2, 2), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # Maxpool layer l = MaxPoolLayer(l, pool_size=(3, 3), stride=(2, 2)) # Convolove with 1x1 filter to match input dimension with the upcoming residual block l = batch_norm(ConvLayer(l, num_filters=256, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) ############# First residual blocks ############# for _ in range(num_blocks[0] - 1): l = residual_block3(l, base_dim=64) ############# Second residual blocks ############ # Increment Dimension l = residual_block3(l, base_dim=128, increase_dim=True, projection=True) for _ in range(num_blocks[1] - 1): l = residual_block3(l, base_dim=128) ############# Third residual blocks ############# # Increment Dimension l = residual_block3(l, base_dim=256, increase_dim=True, projection=True) for _ in range(num_blocks[2] - 1): l = residual_block3(l, base_dim=256) ############# Fourth residual blocks ############# # Increment Dimension l = residual_block3(l, base_dim=512, increase_dim=True, projection=True) for _ in range(num_blocks[2] - 1): l = residual_block3(l, base_dim=512) # Global pooling layer l = GlobalPoolLayer(l) # Softmax Layer softmax_layer = DenseLayer(l, num_units=output_dim, W=nn.init.HeNormal(), nonlinearity=softmax) return softmax_layer
def build_model(input_var): layers = [] input_layer = nn.layers.InputLayer( shape=(None, num_channels, input_width, input_height), input_var=input_var, name='inputs' ) layers.append(input_layer) conv_1 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(7, 7), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_1) pool_1 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) #MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) layers.append(pool_1) conv_2 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_2) conv_3 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_3) #pool_2 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_2 = batch_norm(Conv2DLayer(layers[-1], num_filters=32, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_2) conv_4 = batch_norm(Conv2DLayer(layers[-1], num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_4) conv_5 = batch_norm(Conv2DLayer(layers[-1], num_filters=64, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_5) #pool_3 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_3 = batch_norm(Conv2DLayer(layers[-1], num_filters=64, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_3) conv_6 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_6) conv_7 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_7) conv_8 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_8) conv_9 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_9) #pool_4 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_4 = batch_norm(Conv2DLayer(layers[-1], num_filters=128, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_4) conv_10 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_10) conv_11 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_11) conv_12 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_12) conv_13 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(1, 1), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(conv_13) #pool_5 = MaxPool2DLayer(layers[-1], pool_size=(3, 3), stride=(2, 2)) pool_5 = batch_norm(Conv2DLayer(layers[-1], num_filters=256, filter_size=(3, 3), stride=(2, 2), pad='same', nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1), untie_biases=True)) layers.append(pool_5) drop_1 = nn.layers.DropoutLayer(layers[-1], p=0.5) layers.append(drop_1) fc_1 = DenseLayer(layers[-1], num_units=2048, nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1)) layers.append(fc_1) pool_6 = nn.layers.FeaturePoolLayer(layers[-1], pool_size=2, pool_function=T.max) layers.append(pool_6) drop_2 = nn.layers.DropoutLayer(layers[-1], p=0.5) layers.append(drop_2) fc_2 = DenseLayer(layers[-1], num_units=1024, nonlinearity=LeakyRectify(leakiness), W=nn.init.Orthogonal(1.0), b=nn.init.Constant(0.1)) layers.append(fc_2) pool_7 = nn.layers.FeaturePoolLayer(layers[-1], pool_size=2, pool_function=T.max) layers.append(pool_7) drop_3 = nn.layers.DropoutLayer(layers[-1], p=0.5) layers.append(drop_3) softmax_layer = DenseLayer(drop_3, num_units=output_dim, nonlinearity=softmax) layers.append(softmax_layer) regression_layer = DenseLayer(drop_3, num_units=1, nonlinearity=identity) layers.append(regression_layer) output = nn.layers.merge.ConcatLayer([softmax_layer, regression_layer]) layers.append(output) return input_layer, output
def residual_block3(l, base_dim, increase_dim=False, projection=False): if increase_dim: layer_1 = batch_norm( ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', W=nn.init.HeNormal(gain='relu'))) else: layer_1 = batch_norm( ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_2 = batch_norm( ConvLayer(layer_1, num_filters=base_dim, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_3 = batch_norm( ConvLayer(layer_2, num_filters=4 * base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # add shortcut connection if increase_dim: if projection: # projection shortcut (option B in paper) projection = batch_norm( ConvLayer(l, num_filters=4 * base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None)) block = NonlinearityLayer(ElemwiseSumLayer( [layer_3, projection]), nonlinearity=rectify) else: # identity shortcut (option A in paper) # we use a pooling layer to get identity with strides, # since identity layers with stride don't exist in Lasagne identity = PoolLayer(l, pool_size=1, stride=(2, 2), mode='average_exc_pad') padding = PadLayer(identity, [4 * base_dim, 0, 0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]), nonlinearity=rectify) return block
def build_model(input_var): # Three layer residual block def residual_block3(l, base_dim, increase_dim=False, projection=False): if increase_dim: layer_1 = batch_norm( ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', W=nn.init.HeNormal(gain='relu'))) else: layer_1 = batch_norm( ConvLayer(l, num_filters=base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_2 = batch_norm( ConvLayer(layer_1, num_filters=base_dim, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) layer_3 = batch_norm( ConvLayer(layer_2, num_filters=4 * base_dim, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # add shortcut connection if increase_dim: if projection: # projection shortcut (option B in paper) projection = batch_norm( ConvLayer(l, num_filters=4 * base_dim, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None)) block = NonlinearityLayer(ElemwiseSumLayer( [layer_3, projection]), nonlinearity=rectify) else: # identity shortcut (option A in paper) # we use a pooling layer to get identity with strides, # since identity layers with stride don't exist in Lasagne identity = PoolLayer(l, pool_size=1, stride=(2, 2), mode='average_exc_pad') padding = PadLayer(identity, [4 * base_dim, 0, 0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([layer_3, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([layer_3, l]), nonlinearity=rectify) return block # Input of the network input_layer = InputLayer(shape=(batch_size, num_channels, input_height, input_width), input_var=input_var) # Very first conv layer l = batch_norm( ConvLayer(input_layer, num_filters=64, filter_size=(7, 7), stride=(2, 2), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) # Maxpool layer l = MaxPoolLayer(l, pool_size=(3, 3), stride=(2, 2)) # Convolove with 1x1 filter to match input dimension with the upcoming residual block l = batch_norm( ConvLayer(l, num_filters=256, filter_size=(1, 1), stride=(1, 1), nonlinearity=rectify, pad='same', W=nn.init.HeNormal(gain='relu'))) ############# First residual blocks ############# for _ in range(num_blocks[0] - 1): l = residual_block3(l, base_dim=64) ############# Second residual blocks ############ # Increment Dimension l = residual_block3(l, base_dim=128, increase_dim=True, projection=True) for _ in range(num_blocks[1] - 1): l = residual_block3(l, base_dim=128) ############# Third residual blocks ############# # Increment Dimension l = residual_block3(l, base_dim=256, increase_dim=True, projection=True) for _ in range(num_blocks[2] - 1): l = residual_block3(l, base_dim=256) ############# Fourth residual blocks ############# # Increment Dimension l = residual_block3(l, base_dim=512, increase_dim=True, projection=True) for _ in range(num_blocks[2] - 1): l = residual_block3(l, base_dim=512) # Global pooling layer l = GlobalPoolLayer(l) # Softmax Layer softmax_layer = DenseLayer(l, num_units=output_dim, W=nn.init.HeNormal(), nonlinearity=softmax) return softmax_layer
def __init__(self, state_size, action_size, TAU=0.001, write_sum=0, net_size_scale=1): self.counter = 0 self.state_size = state_size self.action_size = action_size self.output_size = 1 self.write_sum = write_sum self.input_size = state_size + action_size self.hidden_dims = [int(n * net_size_scale) for n in CN_N_HIDDENS] self.activations = CN_ACTS #print("n hiddens: "+str(self.hidden_dims)) tf.reset_default_graph() self.g = tf.Graph() with self.g.as_default(): self.sess = tf.InteractiveSession() #Critic Q Network: self.state = tf.placeholder("float32", [None, state_size], name="state") self.action = tf.placeholder("float32", [None, action_size], name="action") self.t_state = tf.placeholder("float32", [None, state_size], name="t_state") self.t_action = tf.placeholder("float32", [None, action_size], name="t_action") self.is_training = tf.placeholder(tf.bool, [], name="is_training") # Critic network self.A1, self.para1, self.bn_para1 = DNN.nn_bn_layer( self.state, self.state_size, self.hidden_dims[0], "Layer_1", self.activations[0], self.is_training, self.sess) with tf.name_scope("Layer_2"): with tf.name_scope('weights'): self.W2 = tf.Variable( tf.random_uniform( [self.hidden_dims[0], self.hidden_dims[1]], -1 / math.sqrt(self.hidden_dims[0] + action_size), 1 / math.sqrt(self.hidden_dims[0] + action_size))) DNN.variable_summaries(self.W2) with tf.name_scope('biases'): self.B2 = tf.Variable( tf.random_uniform( [self.hidden_dims[1]], -1 / math.sqrt(self.hidden_dims[0] + action_size), 1 / math.sqrt(self.hidden_dims[0] + action_size))) DNN.variable_summaries(self.B2) with tf.name_scope('action_weights'): self.W2_action = tf.Variable( tf.random_uniform( [action_size, self.hidden_dims[1]], -1 / math.sqrt(self.hidden_dims[0] + action_size), 1 / math.sqrt(self.hidden_dims[0] + action_size))) DNN.variable_summaries(self.W2_action) with tf.name_scope('pre_bn'): self.PBN2 = tf.matmul(self.A1, self.W2) + tf.matmul( self.action, self.W2_action) DNN.variable_summaries(self.PBN2) with tf.name_scope('pre_activation'): self.BN2 = batch_norm(self.PBN2, self.hidden_dims[1], self.is_training, self.sess) DNN.variable_summaries(self.BN2.bnorm) with tf.name_scope('activation'): self.A2 = self.activations[1](self.BN2.bnorm) #+ self.B2 DNN.variable_summaries(self.A2) self.Q_value, self.out_paras = DNN.nn_layer( self.A2, self.hidden_dims[1], self.output_size, "Output", self.activations[2]) self.parameters = [self.para1[0], self.W2, self.W2_action] self.parameters.extend(self.out_paras) self.bn_paras = [self.bn_para1[0], self.BN2] # Target critic network self.t_A1, self.t_para1, self.t_bn_para1 = DNN.nn_bn_layer( self.t_state, self.state_size, self.hidden_dims[0], "T_Layer_1", self.activations[0], self.is_training, self.sess, self.bn_paras[0]) with tf.name_scope("T_Layer_2"): with tf.name_scope('weights'): self.t_W2 = tf.Variable( tf.random_uniform( [self.hidden_dims[0], self.hidden_dims[1]], -1 / math.sqrt(self.hidden_dims[0] + action_size), 1 / math.sqrt(self.hidden_dims[0] + action_size))) DNN.variable_summaries(self.t_W2) with tf.name_scope('biases'): self.t_B2 = tf.Variable( tf.random_uniform( [self.hidden_dims[1]], -1 / math.sqrt(self.hidden_dims[0] + action_size), 1 / math.sqrt(self.hidden_dims[0] + action_size))) DNN.variable_summaries(self.t_B2) with tf.name_scope('action_weights'): self.t_W2_action = tf.Variable( tf.random_uniform( [action_size, self.hidden_dims[1]], -1 / math.sqrt(self.hidden_dims[0] + action_size), 1 / math.sqrt(self.hidden_dims[0] + action_size))) DNN.variable_summaries(self.t_W2_action) with tf.name_scope('pre_bn'): self.t_PBN2 = tf.matmul(self.t_A1, self.t_W2) + tf.matmul( self.t_action, self.t_W2_action) DNN.variable_summaries(self.t_PBN2) with tf.name_scope('pre_activation'): self.t_BN2 = batch_norm(self.t_PBN2, self.hidden_dims[1], self.is_training, self.sess, self.bn_paras[1]) DNN.variable_summaries(self.t_BN2.bnorm) with tf.name_scope('activation'): self.t_A2 = self.activations[1]( self.t_BN2.bnorm) #+ self.B2 DNN.variable_summaries(self.t_A2) self.t_Q_value, self.t_out_paras = DNN.nn_layer( self.t_A2, self.hidden_dims[1], self.output_size, "T_Output", self.activations[2]) self.t_parameters = [self.t_para1[0], self.t_W2, self.t_W2_action] self.t_parameters.extend(self.t_out_paras) self.t_bn_paras = [self.t_bn_para1[0], self.t_BN2] self.parameters_name = ["W1", "W2", "W2_action", "W3", "B3"] self.bn_parameters_name = ["BN1", "BN2"] self.num_paras = len(self.parameters) self.num_bn = len(self.bn_paras) self.Q_obj = tf.placeholder("float32", [None, 1], name="obj_Q") #supervisor self.learning_rate = tf.placeholder("float32", shape=[], name="learning_rate") with tf.name_scope("cost"): #self.l2_regularizer_loss = tf.nn.l2_loss(self.W1_state)+tf.nn.l2_loss(self.W2)+ tf.nn.l2_loss(self.W2_action) + tf.nn.l2_loss(self.W3)+tf.nn.l2_loss(self.B1)+tf.nn.l2_loss(self.B2)+tf.nn.l2_loss(self.B3) #self.l2_regularizer_loss = 0.0001*tf.reduce_mean( tf.square( self.W2 ) ) #self.cost = tf.pow( self.Q_value - self.Q_obj, 2 )/AC_BATCH_SIZE + self.l2_regularizer_loss#/tf.to_float(tf.shape(self.Q_obj)[0]) self.cost = tf.reduce_mean(tf.square(self.Q_value - self.Q_obj)) with tf.name_scope("Grad_min_cost"): self.optimizer = tf.train.AdamOptimizer( self.learning_rate, name="critic_grad").minimize(self.cost, name="min_cost") #action gradient to be used in actor network: with tf.name_scope("Grad_Q2a"): self.act_grad_v = tf.gradients(self.Q_value, self.action, name="Grad_to_action") self.action_gradients = [ self.act_grad_v[0] / tf.to_float(tf.shape(self.act_grad_v[0])[0]) ] #this is just divided by batch size #initialize all tensor variable parameters: self.sess.run(tf.global_variables_initializer()) self.init_target_op = [ ] # To initialize critic and target with the same values: copy target parameters self.update_target_critic_op = [ ] # Operations to update target network, including BN parameters for i in range(self.num_paras): self.init_target_op.append(self.t_parameters[i].assign( self.parameters[i])) self.update_target_critic_op.append( self.t_parameters[i].assign(TAU * self.parameters[i] + (1 - TAU) * self.t_parameters[i])) for i in range(self.num_bn): self.update_target_critic_op.append( self.t_bn_paras[i].updateTarget) self.sess.run(self.init_target_op) # operations to be evaluated during training, including apply gradient and update statistics in BN layers self.train_op = [self.optimizer] for i in range(self.num_bn): self.train_op.append(self.bn_paras[i].train_mean) self.train_op.append(self.bn_paras[i].train_var) self.train_op.append(self.t_bn_paras[i].train_mean) self.train_op.append(self.t_bn_paras[i].train_var) net_path = "./model/cn" if self.write_sum > 0: self.saver = tf.train.Saver() self.saver.save(self.sess, net_path + "/net.ckpt") writer = tf.summary.FileWriter(net_path) writer.add_graph(self.sess.graph) writer.close() self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter( net_path, self.sess.graph) self.run_metadata = tf.RunMetadata()
def __init__(self, num_states, num_actions): tf.reset_default_graph() self.g = tf.Graph() with self.g.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) # actor network model parameters: self.actor_state_in = tf.placeholder("float", [None, num_states]) self.W1_a = tf.Variable( tf.random_uniform([num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.B1_a = tf.Variable( tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.W2_a = tf.Variable( tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) self.B2_a = tf.Variable( tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) self.W3_a = tf.Variable( tf.random_uniform([N_HIDDEN_2, num_actions], -0.003, 0.003)) self.B3_a = tf.Variable( tf.random_uniform([num_actions], -0.003, 0.003)) self.is_training = tf.placeholder(tf.bool, []) self.H1_t = tf.matmul(self.actor_state_in, self.W1_a) self.H1_a_bn = batch_norm(self.H1_t, N_HIDDEN_1, self.is_training, self.sess) self.H1_a = tf.nn.softplus(self.H1_a_bn.bnorm) + self.B1_a self.H2_t = tf.matmul(self.H1_a, self.W2_a) self.H2_a_bn = batch_norm(self.H2_t, N_HIDDEN_2, self.is_training, self.sess) self.H2_a = tf.nn.tanh(self.H2_a_bn.bnorm) + self.B2_a self.actor_model = tf.matmul(self.H2_a, self.W3_a) + self.B3_a # target actor network model parameters: self.t_actor_state_in = tf.placeholder("float", [None, num_states]) self.t_W1_a = tf.Variable( tf.random_uniform([num_states, N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.t_B1_a = tf.Variable( tf.random_uniform([N_HIDDEN_1], -1 / math.sqrt(num_states), 1 / math.sqrt(num_states))) self.t_W2_a = tf.Variable( tf.random_uniform([N_HIDDEN_1, N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) self.t_B2_a = tf.Variable( tf.random_uniform([N_HIDDEN_2], -1 / math.sqrt(N_HIDDEN_1), 1 / math.sqrt(N_HIDDEN_1))) self.t_W3_a = tf.Variable( tf.random_uniform([N_HIDDEN_2, num_actions], -0.003, 0.003)) self.t_B3_a = tf.Variable( tf.random_uniform([num_actions], -0.003, 0.003)) self.t_is_training = tf.placeholder(tf.bool, []) self.t_H1_t = tf.matmul(self.t_actor_state_in, self.t_W1_a) self.t_H1_a_bn = batch_norm(self.t_H1_t, N_HIDDEN_1, self.t_is_training, self.sess, self.H1_a_bn) self.t_H1_a = tf.nn.softplus(self.t_H1_a_bn.bnorm) + self.t_B1_a self.t_H2_t = tf.matmul(self.t_H1_a, self.t_W2_a) self.t_H2_a_bn = batch_norm(self.t_H2_t, N_HIDDEN_2, self.t_is_training, self.sess, self.H2_a_bn) self.t_H2_a = tf.nn.tanh(self.t_H2_a_bn.bnorm) + self.t_B2_a self.t_actor_model = tf.matmul(self.t_H2_a, self.t_W3_a) + self.t_B3_a # cost of actor network: # gets input from action_gradient computed in critic network file self.q_gradient_input = tf.placeholder("float", [None, num_actions]) self.actor_parameters = [ self.W1_a, self.B1_a, self.W2_a, self.B2_a, self.W3_a, self.B3_a, self.H1_a_bn.scale, self.H1_a_bn.beta, self.H2_a_bn.scale, self.H2_a_bn.beta ] # /BATCH_SIZE) changed -self.q_gradient to - self.parameters_gradients = tf.gradients(self.actor_model, self.actor_parameters, -self.q_gradient_input) self.optimizer = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE, epsilon=1e-08).apply_gradients( zip(self.parameters_gradients, self.actor_parameters)) # initialize all tensor variable parameters: self.sess.run(tf.initialize_all_variables()) # To make sure actor and target have same intial parmameters copy the parameters: # copy target parameters self.sess.run([ self.t_W1_a.assign(self.W1_a), self.t_B1_a.assign(self.B1_a), self.t_W2_a.assign(self.W2_a), self.t_B2_a.assign(self.B2_a), self.t_W3_a.assign(self.W3_a), self.t_B3_a.assign(self.B3_a) ])
def __init__(self, state_size, action_size, TAU = 0.001, write_sum = 0): self.counter = 0 self.write_sum = write_sum self.activations = [tf.nn.softplus, tf.nn.relu, lambda x, name=[]: x] tf.reset_default_graph() self.g = tf.Graph() with self.g.as_default(): self.sess = tf.InteractiveSession() #Critic Q Network: self.state = tf.placeholder( "float32", [None, state_size], name="state" ) self.action = tf.placeholder( "float32", [None, action_size], name="action" ) self.is_training = tf.placeholder( tf.bool, [], name="is_training" ) with tf.name_scope("Layer_1"): with tf.name_scope('weights'): self.W1 = tf.Variable( tf.random_uniform( [state_size, N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size))) DNN.variable_summaries(self.W1) with tf.name_scope('biases'): self.B1 = tf.Variable( tf.random_uniform( [N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size))) DNN.variable_summaries(self.B1) with tf.name_scope('pre_bn'): self.PBN1 = tf.matmul( self.state, self.W1 ) DNN.variable_summaries(self.PBN1) with tf.name_scope('pre_activation'): self.BN1 = batch_norm( self.PBN1, N_HIDDEN_1, self.is_training, self.sess ) DNN.variable_summaries(self.BN1.bnorm) with tf.name_scope('activation'): self.A1 = self.activations[0]( self.BN1.bnorm ) #+ self.B1 DNN.variable_summaries(self.A1) with tf.name_scope("Layer_2"): with tf.name_scope('weights'): self.W2 = tf.Variable( tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size))) DNN.variable_summaries(self.W2) with tf.name_scope('biases'): self.B2 = tf.Variable( tf.random_uniform( [N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size))) DNN.variable_summaries(self.B2) with tf.name_scope('action_weights'): self.W2_action = tf.Variable( tf.random_uniform( [action_size, N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size))) DNN.variable_summaries(self.W2_action) with tf.name_scope('pre_bn'): self.PBN2 = tf.matmul( self.A1, self.W2) + tf.matmul( self.action, self.W2_action ) DNN.variable_summaries(self.PBN2) with tf.name_scope('pre_activation'): self.BN2 = batch_norm( self.PBN2, N_HIDDEN_2, self.is_training, self.sess ) DNN.variable_summaries(self.BN2.bnorm) with tf.name_scope('activation'): self.A2 = self.activations[1]( self.BN2.bnorm ) #+ self.B2 DNN.variable_summaries(self.A2) with tf.name_scope("Output_layer"): with tf.name_scope('weights'): self.W3 = tf.Variable( tf.random_uniform( [N_HIDDEN_2, 1], -0.003, 0.003 ) ) DNN.variable_summaries(self.W3) with tf.name_scope('biases'): self.B3 = tf.Variable( tf.random_uniform( [1], -0.003, 0.003 ) ) DNN.variable_summaries(self.B3) with tf.name_scope('activation'): self.Q_value = self.activations[2]( tf.matmul( self.A2, self.W3 ) + self.B3 ) DNN.variable_summaries( self.Q_value ) # Target Critic Q Network: self.t_state = tf.placeholder( "float32", [None,state_size] , name="t_state" ) self.t_action = tf.placeholder( "float32", [None,action_size], name="t_action" ) with tf.name_scope("T_Layer_1"): with tf.name_scope('weights'): self.t_W1 = tf.Variable( tf.random_uniform( [state_size,N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size) ) ) with tf.name_scope('biases'): self.t_B1 = tf.Variable( tf.random_uniform( [N_HIDDEN_1], -1/math.sqrt(state_size), 1/math.sqrt(state_size) ) ) with tf.name_scope('pre_bn'): self.t_PBN1 = tf.matmul( self.t_state, self.t_W1 ) with tf.name_scope('pre_activation'): self.t_BN1 = batch_norm( self.t_PBN1, N_HIDDEN_1, self.is_training, self.sess, self.BN1 ) with tf.name_scope('activation'): self.t_A1 = self.activations[0]( self.t_BN1.bnorm ) #+ self.t_B1 with tf.name_scope("T_Layer_2"): with tf.name_scope('weights'): self.t_W2 = tf.Variable( tf.random_uniform( [N_HIDDEN_1, N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size) ) ) self.t_W2_action = tf.Variable( tf.random_uniform( [action_size, N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size) ) ) with tf.name_scope('biases'): self.t_B2 = tf.Variable( tf.random_uniform( [N_HIDDEN_2], -1/math.sqrt(N_HIDDEN_1+action_size), 1/math.sqrt(N_HIDDEN_1+action_size) ) ) with tf.name_scope('pre_bn'): self.t_PBN2 = tf.matmul( self.t_A1, self.t_W2) + tf.matmul( self.t_action, self.t_W2_action ) with tf.name_scope('pre_activation'): self.t_BN2 = batch_norm( self.t_PBN2, N_HIDDEN_2, self.is_training, self.sess, self.BN2 ) with tf.name_scope('activation'): self.t_A2 = self.activations[1]( self.t_BN2.bnorm ) #+ self.t_B2 with tf.name_scope("T_Output_layer"): with tf.name_scope('weights'): self.t_W3 = tf.Variable( tf.random_uniform( [N_HIDDEN_2,1], -0.003, 0.003 ) ) with tf.name_scope('biases'): self.t_B3 = tf.Variable( tf.random_uniform( [1], -0.003, 0.003 ) ) with tf.name_scope('activation'): self.t_Q_value = self.activations[2]( tf.matmul( self.t_A2, self.t_W3 ) + self.t_B3 ) self.Q_obj = tf.placeholder( "float32", [None,1], name="obj_Q" ) #supervisor self.learning_rate = tf.placeholder( "float32", shape=[], name="learning_rate" ) #self.l2_regularizer_loss = tf.nn.l2_loss(self.W1)+tf.nn.l2_loss(self.W2)+ tf.nn.l2_loss(self.W2_action) + tf.nn.l2_loss(self.W3)+tf.nn.l2_loss(self.B1)+tf.nn.l2_loss(self.B2)+tf.nn.l2_loss(self.B3) #self.l2_regularizer_loss = 0.0001*tf.reduce_mean( tf.square( self.W2 ) ) with tf.name_scope("cost"): #self.cost = tf.pow( self.Q_value - self.Q_obj, 2 )/AC_BATCH_SIZE + self.l2_regularizer_loss#/tf.to_float(tf.shape(self.Q_obj)[0]) self.cost = tf.reduce_mean( tf.square( self.Q_value - self.Q_obj ) ) self.optimizer = tf.train.AdamOptimizer( self.learning_rate ).minimize(self.cost) self.act_grad_v = tf.gradients( self.Q_value, self.action ) self.action_gradients = [ self.act_grad_v[0]/tf.to_float( tf.shape(self.act_grad_v[0])[0] ) ] #this is just divided by batch size #from simple actor net: self.check_fl = self.action_gradients #initialize all tensor variable parameters: self.sess.run( tf.global_variables_initializer() ) #To initialize critic and target with the same values: # copy target parameters self.sess.run([ self.t_W1.assign( self.W1 ), self.t_B1.assign( self.B1 ), self.t_W2.assign( self.W2 ), self.t_W2_action.assign( self.W2_action ), self.t_B2.assign( self.B2 ), self.t_W3.assign( self.W3 ), self.t_B3.assign( self.B3 ) ]) self.update_target_critic_op = [ self.t_W1.assign( TAU*self.W1 + (1-TAU)*self.t_W1 ), self.t_B1.assign( TAU*self.B1 + (1-TAU)*self.t_B1 ), self.t_W2.assign( TAU*self.W2 + (1-TAU)*self.t_W2 ), self.t_W2_action.assign( TAU*self.W2_action + (1-TAU)*self.t_W2_action ), self.t_B2.assign( TAU*self.B2 + (1-TAU)*self.t_B2 ), self.t_W3.assign( TAU*self.W3 + (1-TAU)*self.t_W3 ), self.t_B3.assign( TAU*self.B3 + (1-TAU)*self.t_B3 ), self.t_BN1.updateTarget, self.t_BN2.updateTarget ] net_path = "./model/cn" if self.write_sum > 0: self.saver = tf.train.Saver() self.saver.save(self.sess, net_path + "/net.ckpt") writer = tf.summary.FileWriter( net_path ) writer.add_graph(self.sess.graph) writer.close() self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter( net_path, self.sess.graph ) self.run_metadata = tf.RunMetadata()