Beispiel #1
0
	def __init__(self, state, state_dims, action_dims, action_bound_low, action_bound_high, dense1_size, dense2_size, final_layer_init, scope='actor'):
		# state - State input to pass through the network
		# action_bounds - Network will output in range [-1,1]. Multiply this by action_bound to get output within desired boundaries of action space
		 
		
		self.state = state
		self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
		self.action_dims = np.prod(action_dims)
		self.action_bound_low = action_bound_low
		self.action_bound_high = action_bound_high
		self.scope = scope
		 
		with tf.variable_scope(self.scope):
					
			self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
						 
			self.dense1 = relu(self.dense1_mul, scope='dense1')
			 
			self.dense2_mul = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size))), 1/tf.sqrt(tf.to_float(dense1_size))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size))), 1/tf.sqrt(tf.to_float(dense1_size))), scope='dense2')        
						 
			self.dense2 = relu(self.dense2_mul, scope='dense2')
			 
			self.output_mul = dense(self.dense2, self.action_dims, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
								bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output') 
			 
			self.output_tanh = tanh(self.output_mul, scope='output')
			 
			# Scale tanh output to lower and upper action bounds
			self.output = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.action_bound_high-self.action_bound_low)) + (self.action_bound_high+self.action_bound_low))
			 
			
			self.network_params = tf.trainable_variables(scope=self.scope)
			self.bn_params = [] # No batch norm params
Beispiel #2
0
    def __init__(self, state, action, state_dims, action_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, is_training=False, scope='critic'):
        # state - State input to pass through the network
        # action - Action input for which the Z distribution should be predicted
        
        self.state = state
        self.action = action
        self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
        self.action_dims = np.prod(action_dims)
        self.is_training = is_training
        self.scope = scope    

        
        with tf.variable_scope(self.scope):
            self.input_norm = batchnorm(self.state, self.is_training, scope='input_norm')
           
            self.dense1_mul = dense(self.input_norm, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
                                bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
            
            self.dense1_bn = batchnorm(self.dense1_mul, self.is_training, scope='dense1')
            
            self.dense1 = relu(self.dense1_bn, scope='dense1')
            
            #Merge first dense layer with action input to get second dense layer            
            self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
                                bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a')        
            
            self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
                                bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') 
            
            self.dense2 = relu(self.dense2a + self.dense2b, scope='dense2')
            
            self.output_logits = dense(self.dense2, num_atoms, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
                                       bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output_logits')  
            
            self.output_probs = softmax(self.output_logits, scope='output_probs')
                         
                          
            self.network_params = tf.trainable_variables(scope=self.scope)
            self.bn_params = [v for v in tf.global_variables(scope=self.scope) if 'batch_normalization/moving' in v.name]
            
            
            self.z_atoms = tf.lin_space(v_min, v_max, num_atoms)
            
            self.Q_val = tf.reduce_sum(self.z_atoms * self.output_probs) # the Q value is the mean of the categorical output Z-distribution
          
            self.action_grads = tf.gradients(self.output_probs, self.action, self.z_atoms) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
Beispiel #3
0
	def __init__(self, state, action, noise, state_dims, action_dims, noise_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, is_training=False, scope='critic'):
		# state - State input to pass through the network
		# action - Action input for which the Z distribution should be predicted
		
		self.state = state
		self.action = action
		self.noise = noise
		self.noise_dims = np.prod(noise_dims)
		self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
		self.action_dims = np.prod(action_dims)
		self.v_min = v_min
		self.v_max = v_max
		self.num_atoms = num_atoms
		self.scope = scope   
		batch_size = 256 
		self.is_training = is_training
		self.scope = scope

		with tf.variable_scope(self.scope):           
			self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
						 
			self.dense1 = relu(self.dense1_mul, scope='dense1')
			 
			#Merge first dense layer with action and noise input to get second dense layer            
			self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a')        
			 
			self.dense2a = tf.reshape(self.dense2a, [batch_size, 1 , dense2_size])
			
			self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), \
			1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),\
			bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') 
			
			self.dense2b = tf.reshape(self.dense2b, [batch_size, 1 , dense2_size])
			
			self.noise = tf.reshape(self.noise, [batch_size*num_atoms , noise_dims])
			
            # TODO whether or not we need modified this item
			self.dense2c = dense(self.noise, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), scope='dense2c') 
						   
			
			self.dense2c = tf.reshape(self.dense2c, [batch_size, num_atoms , dense2_size])
			
			self.dense2 = relu(self.dense2a + self.dense2b + self.dense2c, scope='dense2')
			
			self.dense2 = tf.reshape(self.dense2, [batch_size*num_atoms, dense2_size])
			
						  
			self.output_mul = dense(self.dense2, 1, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
									   bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output')  
			
			#self.output_tanh = tanh(0.05*self.output_mul, scope='output')
			
			#self.output_samples = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.v_max - self.v_min)) + (self.v_max + self.v_min))
			
			self.output_samples = self.output_mul
			
			self.output_samples = tf.reshape(self.output_samples, [batch_size, num_atoms])
						 
						  
			self.network_params = tf.trainable_variables(scope=self.scope)
			self.bn_params = [v for v in tf.global_variables(scope=self.scope) if 'batch_normalization/moving' in v.name]
			
			self.CVaR_value = CVaR_sample(self.output_samples,train_params.CVaR_alpha,train_params.CVaR_optimizing)
			self.Q_val = tf.reduce_mean(self.output_samples, axis=1) # the Q value is the mean of the generated samples
		  
			# TODO add utility function HERE!
			self.action_grads = tf.gradients(self.CVaR_value, self.action) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
Beispiel #4
0
	def __init__(self, state, action, noise,state_dims, action_dims, noise_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, scope='critic'):
		# state - State input to pass through the network
		# action - Action input for which the Z distribution should be predicted
		 
		self.state = state
		self.action = action
		self.noise = noise
		self.noise_dims = np.prod(noise_dims)
		self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
		self.action_dims = np.prod(action_dims)
		self.v_min = v_min
		self.v_max = v_max
		self.scope = scope  
		batch_size = 256   
		 
		with tf.variable_scope(self.scope):           
			self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
						 
			self.dense1 = relu(self.dense1_mul, scope='dense1')
			 
			#Merge first dense layer with action and noise input to get second dense layer            
			self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a')        
			 
			self.dense2a = tf.reshape(self.dense2a, [batch_size, 1 , dense2_size])
			
			self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), \
			1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),\
			bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') 
			
			self.dense2b = tf.reshape(self.dense2b, [batch_size, 1 , dense2_size])
			
			self.noise = tf.reshape(self.noise, [batch_size*num_atoms , noise_dims])
			
			self.dense2c = dense(self.noise, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), scope='dense2c') 
						   
			
			self.dense2c = tf.reshape(self.dense2c, [batch_size, num_atoms , dense2_size])
			
			self.dense2 = relu(self.dense2a + self.dense2b + self.dense2c, scope='dense2')
			
			self.dense2 = tf.reshape(self.dense2, [batch_size*num_atoms, dense2_size])
			
						  
			self.output_mul = dense(self.dense2, 1, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
									   bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output')  
			
			self.output_tanh = tanh(0.2*self.output_mul, scope='output')
			
			self.output_samples = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.v_max - self.v_min)) + (self.v_max + self.v_min))
			
			self.output_samples = tf.reshape(self.output_samples, [batch_size, num_atoms])
						 
						  
			self.network_params = tf.trainable_variables(scope=self.scope)
			self.bn_params = [] # No batch norm params
			
			self.Q_val = tf.reduce_mean(self.output_samples, axis=1) # the Q value is the mean of the generated samples
		  
			self.action_grads = tf.gradients(self.output_samples/num_atoms, self.action) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
Beispiel #5
0
    def __init__(self,
                 state,
                 action,
                 state_dims,
                 action_dims,
                 args,
                 is_training=False,
                 scope='critic'):
        # state - State input to pass through the network
        # action - Action input for which the Q value should be predicted

        self.state = state
        self.action = action
        self.state_dims = np.prod(
            state_dims
        )  #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
        self.action_dims = np.prod(action_dims)
        self.args = args
        self.is_training = is_training
        self.scope = scope

        # Networks params
        dense1_size = self.args.dense1_size
        dense2_size = self.args.dense2_size
        final_layer_init = self.args.final_layer_init

        with tf.variable_scope(self.scope):
            self.input_norm = batchnorm(self.state,
                                        self.is_training,
                                        scope='input_norm')

            self.dense1_mul = dense(
                self.input_norm,
                dense1_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                scope='dense1')

            self.dense1_bn = batchnorm(self.dense1_mul,
                                       self.is_training,
                                       scope='dense1')

            self.dense1 = relu(self.dense1_bn, scope='dense1')

            #Merge first dense layer with action input to get second dense layer
            self.dense2a = dense(
                self.dense1,
                dense2_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                scope='dense2a')

            self.dense2b = dense(
                self.action,
                dense2_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                scope='dense2b')

            self.dense2 = relu(self.dense2a + self.dense2b, scope='dense2')

            self.output = dense(self.dense2,
                                1,
                                weight_init=tf.random_uniform_initializer(
                                    -1 * final_layer_init, final_layer_init),
                                bias_init=tf.random_uniform_initializer(
                                    -1 * final_layer_init, final_layer_init),
                                scope='output')

            self.network_params = tf.trainable_variables(scope=self.scope)

            self.action_grads = tf.gradients(
                self.output, self.action
            )  # Gradient of value output wrt action input - used to train actor network
Beispiel #6
0
def generator(input_, angles, reuse=False):
    """ Generator.

    Parameters
    ----------
    input_: tensor, input images.
    angles: tensor, target gaze direction.
    reuse: bool, reuse the net if True.

    Returns
    -------
    x: tensor, generated image.

    """

    channel = 64
    style_dim = angles.get_shape().as_list()[-1]

    angles_reshaped = tf.reshape(angles, [-1, 1, 1, style_dim])
    angles_tiled = tf.tile(angles_reshaped,
                           [1, tf.shape(input_)[1],
                            tf.shape(input_)[2], 1])
    x = tf.concat([input_, angles_tiled], axis=3)

    with tf.compat.v1.variable_scope('generator', reuse=reuse):

        # input layer
        x = conv2d(x,
                   channel,
                   d_h=1,
                   d_w=1,
                   scope='conv2d_input',
                   use_bias=False,
                   pad=3,
                   conv_filters_dim=7)
        x = instance_norm(x, scope='in_input')
        x = relu(x)

        # encoder
        for i in range(2):

            x = conv2d(x,
                       2 * channel,
                       d_h=2,
                       d_w=2,
                       scope='conv2d_%d' % i,
                       use_bias=False,
                       pad=1,
                       conv_filters_dim=4)
            x = instance_norm(x, scope='in_conv_%d' % i)
            x = relu(x)
            channel = 2 * channel

        # bottleneck
        for i in range(6):

            x_a = conv2d(x,
                         channel,
                         conv_filters_dim=3,
                         d_h=1,
                         d_w=1,
                         pad=1,
                         use_bias=False,
                         scope='conv_res_a_%d' % i)
            x_a = instance_norm(x_a, 'in_res_a_%d' % i)
            x_a = relu(x_a)
            x_b = conv2d(x_a,
                         channel,
                         conv_filters_dim=3,
                         d_h=1,
                         d_w=1,
                         pad=1,
                         use_bias=False,
                         scope='conv_res_b_%d' % i)
            x_b = instance_norm(x_b, 'in_res_b_%d' % i)

            x = x + x_b

        # decoder
        for i in range(2):

            x = deconv2d(x,
                         int(channel / 2),
                         conv_filters_dim=4,
                         d_h=2,
                         d_w=2,
                         use_bias=False,
                         scope='deconv_%d' % i)
            x = instance_norm(x, scope='in_decon_%d' % i)
            x = relu(x)
            channel = int(channel / 2)

        x = conv2d(x,
                   3,
                   conv_filters_dim=7,
                   d_h=1,
                   d_w=1,
                   pad=3,
                   use_bias=False,
                   scope='output')
        x = tanh(x)

    return x
Beispiel #7
0
def generator():
    """ Generator.

    Parameters
    ----------
    input_: tensor, input images.
    angles: tensor, target gaze direction.
    reuse: bool, reuse the net if True.

    Returns
    -------
    x: tensor, generated image.

    """

    load_size = 64
    style_dim = 2

    angles = tf.compat.v1.placeholder(tf.float32, shape=[None, 2])
    input_ = tf.compat.v1.placeholder(tf.float32,
                                      shape=[None, load_size, load_size, 3])

    angles_reshaped = tf.reshape(angles, [-1, 1, 1, style_dim])
    angles_tiled = tf.tile(angles_reshaped,
                           [1, tf.shape(input_)[1],
                            tf.shape(input_)[2], 1])
    x = tf.concat([input_, angles_tiled], axis=3)

    with tf.compat.v1.variable_scope("generator", reuse=False):

        # input layer
        x = conv2d(
            x,
            load_size,
            d_h=1,
            d_w=1,
            scope="conv2d_input",
            use_bias=False,
            pad=3,
            conv_filters_dim=7,
        )
        x = instance_norm(x, scope="in_input")
        x = relu(x)

        # encoder
        for i in range(2):

            x = conv2d(
                x,
                2 * load_size,
                d_h=2,
                d_w=2,
                scope="conv2d_%d" % i,
                use_bias=False,
                pad=1,
                conv_filters_dim=4,
            )
            x = instance_norm(x, scope="in_conv_%d" % i)
            x = relu(x)
            load_size = 2 * load_size

        # bottleneck
        for i in range(6):

            x_a = conv2d(
                x,
                load_size,
                conv_filters_dim=3,
                d_h=1,
                d_w=1,
                pad=1,
                use_bias=False,
                scope="conv_res_a_%d" % i,
            )
            x_a = instance_norm(x_a, "in_res_a_%d" % i)
            x_a = relu(x_a)
            x_b = conv2d(
                x_a,
                load_size,
                conv_filters_dim=3,
                d_h=1,
                d_w=1,
                pad=1,
                use_bias=False,
                scope="conv_res_b_%d" % i,
            )
            x_b = instance_norm(x_b, "in_res_b_%d" % i)

            x = x + x_b

        # decoder
        for i in range(2):

            x = deconv2d(
                x,
                int(load_size / 2),
                conv_filters_dim=4,
                d_h=2,
                d_w=2,
                use_bias=False,
                scope="deconv_%d" % i,
            )
            x = instance_norm(x, scope="in_decon_%d" % i)
            x = relu(x)
            load_size = int(load_size / 2)

        x = conv2d(
            x,
            3,
            conv_filters_dim=7,
            d_h=1,
            d_w=1,
            pad=3,
            use_bias=False,
            scope="output",
        )
        x = tanh(x)

    return x, angles, input_
    def __init__(self,
                 state,
                 audio,
                 state_dims,
                 action_dims,
                 action_bound_low,
                 action_bound_high,
                 args,
                 is_training=False,
                 scope='actor'):
        # state - State input to pass through the network
        # action_bounds - Network will output in range [-1,1]. Multiply this by action_bound to get output within desired boundaries of action space

        self.state = state
        self.audio = audio
        self.state_dims = np.prod(
            state_dims
        )  #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
        self.action_dims = np.prod(action_dims)
        self.action_bound_low = action_bound_low
        self.action_bound_high = action_bound_high
        self.args = args
        self.is_training = is_training
        self.scope = scope

        # Networks params
        dense1_size = self.args.dense1_size
        dense2_size = self.args.dense2_size
        final_layer_init = self.args.final_layer_init

        with tf.variable_scope(self.scope):
            self.fc1_a = tf.layers.dense(self.audio, 50, tf.nn.relu)
            self.fc2_a = tf.layers.dense(self.fc1_a, 50, tf.nn.relu)
            self.final_flat = tf.concat(
                [createNetwork_cnn(self.state), self.fc2_a], 1)
            # self.state_dims = np.prod(self.final_flat)
            # print(np.shape(self.final_flat), self.state_dims)

            self.input_norm = batchnorm(self.final_flat,
                                        self.is_training,
                                        scope='input_norm')

            self.dense1_mul = dense(
                self.input_norm,
                dense1_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                scope='dense1')

            self.dense1_bn = batchnorm(self.dense1_mul,
                                       self.is_training,
                                       scope='dense1')

            self.dense1 = relu(self.dense1_bn, scope='dense1')

            self.dense2_mul = dense(
                self.dense1,
                dense2_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(dense1_size))),
                    1 / tf.sqrt(tf.to_float(dense1_size))),
                bias_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(dense1_size))),
                    1 / tf.sqrt(tf.to_float(dense1_size))),
                scope='dense2')

            self.dense2_bn = batchnorm(self.dense2_mul,
                                       self.is_training,
                                       scope='dense2')

            self.dense2 = relu(self.dense2_bn, scope='dense2')

            self.output_mul = dense(self.dense2,
                                    self.action_dims,
                                    weight_init=tf.random_uniform_initializer(
                                        -1 * final_layer_init,
                                        final_layer_init),
                                    bias_init=tf.random_uniform_initializer(
                                        -1 * final_layer_init,
                                        final_layer_init),
                                    scope='output')

            self.output_tanh = tanh(self.output_mul, scope='output')

            # Scale tanh output to lower and upper action bounds
            self.output = tf.multiply(
                0.5,
                tf.multiply(self.output_tanh,
                            (self.action_bound_high - self.action_bound_low)) +
                (self.action_bound_high + self.action_bound_low))

            self.network_params = tf.trainable_variables(scope=self.scope)