Exemple #1
0
	def __init__(self, state, state_dims, action_dims, action_bound_low, action_bound_high, dense1_size, dense2_size, final_layer_init, scope='actor'):
		# state - State input to pass through the network
		# action_bounds - Network will output in range [-1,1]. Multiply this by action_bound to get output within desired boundaries of action space
		 
		
		self.state = state
		self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
		self.action_dims = np.prod(action_dims)
		self.action_bound_low = action_bound_low
		self.action_bound_high = action_bound_high
		self.scope = scope
		 
		with tf.variable_scope(self.scope):
					
			self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
						 
			self.dense1 = relu(self.dense1_mul, scope='dense1')
			 
			self.dense2_mul = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size))), 1/tf.sqrt(tf.to_float(dense1_size))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size))), 1/tf.sqrt(tf.to_float(dense1_size))), scope='dense2')        
						 
			self.dense2 = relu(self.dense2_mul, scope='dense2')
			 
			self.output_mul = dense(self.dense2, self.action_dims, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
								bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output') 
			 
			self.output_tanh = tanh(self.output_mul, scope='output')
			 
			# Scale tanh output to lower and upper action bounds
			self.output = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.action_bound_high-self.action_bound_low)) + (self.action_bound_high+self.action_bound_low))
			 
			
			self.network_params = tf.trainable_variables(scope=self.scope)
			self.bn_params = [] # No batch norm params
Exemple #2
0
 def build_bottom_block(self, inputs, name):
     outs = tf.contrib.layers.flatten(inputs, scope=name + '/flat')
     outs = ops.dense(outs, 512, name + '/dense1', activation_fn=tf.nn.relu)
     outs = ops.dropout(outs, 0.5, name + '/dropout1')
     outs = ops.dense(outs, 512, name + '/dense2', activation_fn=tf.nn.relu)
     outs = ops.dropout(outs, 0.5, name + '/dropout2')
     outs = ops.dense(outs,
                      self.conf.class_num,
                      name + '/dense_output',
                      activation_fn=tf.nn.softmax)
     return outs
    def __init__(self,
                 num_actions,
                 state,
                 action=None,
                 target=None,
                 learning_rate=None,
                 scope='DQN'):
        # State - Input state to pass through the network
        # Action - Action for which the Q value should be predicted (only required for training)
        # Target - Target Q value (only required for training)
        self.input = state
        self.action = action
        self.target = target
        self.num_actions = num_actions
        self.scope = scope
        if learning_rate is not None:
            self.optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                       momentum=0.95,
                                                       epsilon=0.01)

        with tf.variable_scope(self.scope):

            with tf.variable_scope('input_layers'):
                self.input_float = tf.to_float(self.input)
                self.input_norm = tf.divide(self.input_float, 255.0)

            self.conv1 = conv2d(self.input_norm,
                                8,
                                32,
                                4,
                                tf.nn.relu,
                                scope='conv1')
            self.conv2 = conv2d(self.conv1,
                                4,
                                64,
                                2,
                                tf.nn.relu,
                                scope='conv2')
            self.conv3 = conv2d(self.conv2,
                                3,
                                64,
                                1,
                                tf.nn.relu,
                                scope='conv3')
            self.flatten = flatten(self.conv3, scope='flatten')
            self.dense = dense(self.flatten, 512, tf.nn.relu, scope='dense')
            self.output = dense(self.dense, self.num_actions, scope='output')

        self.network_params = tf.trainable_variables(scope=self.scope)
Exemple #4
0
    def __init__(self, state, action, state_dims, action_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, is_training=False, scope='critic'):
        # state - State input to pass through the network
        # action - Action input for which the Z distribution should be predicted
        
        self.state = state
        self.action = action
        self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
        self.action_dims = np.prod(action_dims)
        self.is_training = is_training
        self.scope = scope    

        
        with tf.variable_scope(self.scope):
            self.input_norm = batchnorm(self.state, self.is_training, scope='input_norm')
           
            self.dense1_mul = dense(self.input_norm, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
                                bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
            
            self.dense1_bn = batchnorm(self.dense1_mul, self.is_training, scope='dense1')
            
            self.dense1 = relu(self.dense1_bn, scope='dense1')
            
            #Merge first dense layer with action input to get second dense layer            
            self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
                                bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a')        
            
            self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
                                bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') 
            
            self.dense2 = relu(self.dense2a + self.dense2b, scope='dense2')
            
            self.output_logits = dense(self.dense2, num_atoms, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
                                       bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output_logits')  
            
            self.output_probs = softmax(self.output_logits, scope='output_probs')
                         
                          
            self.network_params = tf.trainable_variables(scope=self.scope)
            self.bn_params = [v for v in tf.global_variables(scope=self.scope) if 'batch_normalization/moving' in v.name]
            
            
            self.z_atoms = tf.lin_space(v_min, v_max, num_atoms)
            
            self.Q_val = tf.reduce_sum(self.z_atoms * self.output_probs) # the Q value is the mean of the categorical output Z-distribution
          
            self.action_grads = tf.gradients(self.output_probs, self.action, self.z_atoms) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
Exemple #5
0
 def inference(self, matrix, outs):
     outputs = []
     outs = ops.simple_conv(matrix, outs, 4 * self.conf.ch_num,
                            self.conf.rate, 'conv0', 3)
     for i in range(self.conf.l_num):
         ratio = 4 // 2**i
         outs = ops.simple_conv(matrix, outs, ratio * self.conf.ch_num,
                                self.conf.rate, 'conv1_%s' % i, 3)
         outputs.append(outs)
         matrix, outs = ops.graph_pool(matrix, outs, 2, 'pool_%s' % i)
     axis_outs = []
     for i, outs in enumerate(outputs):
         outs = tf.reduce_max(outs, axis=1, name='max_pool_%s' % i)
         axis_outs.append(outs)
     outs = tf.concat(axis_outs, axis=1)
     outs = ops.dense(outs, 1024, self.conf.rate, 'dense1')
     outs = ops.dense(outs, self.conf.class_num, self.conf.rate, 'dense2')
     return outs
    def forward_pass(self, state_in, reshape=True, sigmoid_out=False, reuse=None):

        self.state_in = state_in

        shape_in = self.state_in.get_shape().as_list()

        # Get number of input channels for weight/bias init
        channels_in = shape_in[-1]

        with tf.variable_scope(self.scope, reuse=reuse):

            if reshape:
                # Reshape [batch_size, traj_len, H, W, C] into [batch_size*traj_len, H, W, C]
                self.state_in = tf.reshape(self.state_in, [-1, shape_in[1], shape_in[3], shape_in[2]])

            self.layer1 = dense(self.state_in, 16, scope='layer1')

            self.layer2 = dense(self.layer1, 8, scope='layer2')

            self.layer3 = dense(self.layer2, 1, scope='layer3')

            if reshape:
                # Reshape [batch_size, traj_len, H, W, C] into [batch_size*traj_len, H, W, C]
                shape_in = self.layer3.get_shape().as_list()
                self.layer3 = tf.reshape(self.layer3, [-1, shape_in[1], shape_in[3], shape_in[2]])

                self.output = dense(self.layer3, 1, scope='output')

            if sigmoid_out:
                self.output = tf.nn.sigmoid(self.output)

            if reshape:
                # Reshape 1d reward output [batch_size*traj_len] into batches [batch_size, traj_len]
                self.output = tf.reshape(self.output, [-1, shape_in[1]])

            self.network_params = tf.trainable_variables(scope=self.scope)

        return self.output
Exemple #7
0
    def build_discriminator(self,
                            input,
                            channels=3,
                            ndf=64,
                            norm_type='batch',
                            init_type='normal',
                            init_gain=0.02,
                            is_training=True):
        """
        SRGAN Discriminator
        """
        conv_block1 = ops.conv(input,
                               in_channels=channels,
                               out_channels=ndf,
                               filter_size=3,
                               stride=1,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=None,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv1',
                               reuse=self.reuse)

        conv_block2 = ops.conv(conv_block1,
                               in_channels=ndf,
                               out_channels=ndf,
                               filter_size=3,
                               stride=2,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv2',
                               reuse=self.reuse)

        conv_block3 = ops.conv(conv_block2,
                               in_channels=ndf,
                               out_channels=2 * ndf,
                               filter_size=3,
                               stride=1,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv3',
                               reuse=self.reuse)

        conv_block4 = ops.conv(conv_block3,
                               in_channels=2 * ndf,
                               out_channels=2 * ndf,
                               filter_size=3,
                               stride=2,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv4',
                               reuse=self.reuse)

        conv_block5 = ops.conv(conv_block4,
                               in_channels=2 * ndf,
                               out_channels=4 * ndf,
                               filter_size=3,
                               stride=1,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv5',
                               reuse=self.reuse)

        conv_block6 = ops.conv(conv_block5,
                               in_channels=4 * ndf,
                               out_channels=4 * ndf,
                               filter_size=3,
                               stride=2,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv6',
                               reuse=self.reuse)

        conv_block7 = ops.conv(conv_block6,
                               in_channels=4 * ndf,
                               out_channels=8 * ndf,
                               filter_size=3,
                               stride=1,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv7',
                               reuse=self.reuse)

        conv_block8 = ops.conv(conv_block7,
                               in_channels=8 * ndf,
                               out_channels=8 * ndf,
                               filter_size=3,
                               stride=2,
                               weight_init_type=init_type,
                               weight_init_gain=init_gain,
                               norm_type=norm_type,
                               activation_type='LeakyReLU',
                               is_training=is_training,
                               scope='conv8',
                               reuse=self.reuse)

        x = ops.flatten(conv_block8)

        dense = ops.dense(x,
                          in_size=x.get_shape().as_list()[1],
                          out_size=1024,
                          weight_init_type=init_type,
                          weight_init_gain=init_gain,
                          norm_type=None,
                          activation_type='LeakyReLU',
                          is_training=is_training,
                          scope='dense',
                          reuse=self.reuse)

        output = ops.dense(dense,
                           in_size=1024,
                           out_size=1,
                           weight_init_type=init_type,
                           weight_init_gain=init_gain,
                           norm_type=None,
                           activation_type='sigmoid',
                           is_training=is_training,
                           scope='output',
                           reuse=self.reuse)

        return output
Exemple #8
0
	def __init__(self, state, action, noise, state_dims, action_dims, noise_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, is_training=False, scope='critic'):
		# state - State input to pass through the network
		# action - Action input for which the Z distribution should be predicted
		
		self.state = state
		self.action = action
		self.noise = noise
		self.noise_dims = np.prod(noise_dims)
		self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
		self.action_dims = np.prod(action_dims)
		self.v_min = v_min
		self.v_max = v_max
		self.num_atoms = num_atoms
		self.scope = scope   
		batch_size = 256 
		self.is_training = is_training
		self.scope = scope

		with tf.variable_scope(self.scope):           
			self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
						 
			self.dense1 = relu(self.dense1_mul, scope='dense1')
			 
			#Merge first dense layer with action and noise input to get second dense layer            
			self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a')        
			 
			self.dense2a = tf.reshape(self.dense2a, [batch_size, 1 , dense2_size])
			
			self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), \
			1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),\
			bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') 
			
			self.dense2b = tf.reshape(self.dense2b, [batch_size, 1 , dense2_size])
			
			self.noise = tf.reshape(self.noise, [batch_size*num_atoms , noise_dims])
			
            # TODO whether or not we need modified this item
			self.dense2c = dense(self.noise, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), scope='dense2c') 
						   
			
			self.dense2c = tf.reshape(self.dense2c, [batch_size, num_atoms , dense2_size])
			
			self.dense2 = relu(self.dense2a + self.dense2b + self.dense2c, scope='dense2')
			
			self.dense2 = tf.reshape(self.dense2, [batch_size*num_atoms, dense2_size])
			
						  
			self.output_mul = dense(self.dense2, 1, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
									   bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output')  
			
			#self.output_tanh = tanh(0.05*self.output_mul, scope='output')
			
			#self.output_samples = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.v_max - self.v_min)) + (self.v_max + self.v_min))
			
			self.output_samples = self.output_mul
			
			self.output_samples = tf.reshape(self.output_samples, [batch_size, num_atoms])
						 
						  
			self.network_params = tf.trainable_variables(scope=self.scope)
			self.bn_params = [v for v in tf.global_variables(scope=self.scope) if 'batch_normalization/moving' in v.name]
			
			self.CVaR_value = CVaR_sample(self.output_samples,train_params.CVaR_alpha,train_params.CVaR_optimizing)
			self.Q_val = tf.reduce_mean(self.output_samples, axis=1) # the Q value is the mean of the generated samples
		  
			# TODO add utility function HERE!
			self.action_grads = tf.gradients(self.CVaR_value, self.action) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
Exemple #9
0
	def __init__(self, state, action, noise,state_dims, action_dims, noise_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, scope='critic'):
		# state - State input to pass through the network
		# action - Action input for which the Z distribution should be predicted
		 
		self.state = state
		self.action = action
		self.noise = noise
		self.noise_dims = np.prod(noise_dims)
		self.state_dims = np.prod(state_dims)       #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
		self.action_dims = np.prod(action_dims)
		self.v_min = v_min
		self.v_max = v_max
		self.scope = scope  
		batch_size = 256   
		 
		with tf.variable_scope(self.scope):           
			self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1')  
						 
			self.dense1 = relu(self.dense1_mul, scope='dense1')
			 
			#Merge first dense layer with action and noise input to get second dense layer            
			self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a')        
			 
			self.dense2a = tf.reshape(self.dense2a, [batch_size, 1 , dense2_size])
			
			self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), \
			1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),\
			bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') 
			
			self.dense2b = tf.reshape(self.dense2b, [batch_size, 1 , dense2_size])
			
			self.noise = tf.reshape(self.noise, [batch_size*num_atoms , noise_dims])
			
			self.dense2c = dense(self.noise, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))),
								bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), scope='dense2c') 
						   
			
			self.dense2c = tf.reshape(self.dense2c, [batch_size, num_atoms , dense2_size])
			
			self.dense2 = relu(self.dense2a + self.dense2b + self.dense2c, scope='dense2')
			
			self.dense2 = tf.reshape(self.dense2, [batch_size*num_atoms, dense2_size])
			
						  
			self.output_mul = dense(self.dense2, 1, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init),
									   bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output')  
			
			self.output_tanh = tanh(0.2*self.output_mul, scope='output')
			
			self.output_samples = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.v_max - self.v_min)) + (self.v_max + self.v_min))
			
			self.output_samples = tf.reshape(self.output_samples, [batch_size, num_atoms])
						 
						  
			self.network_params = tf.trainable_variables(scope=self.scope)
			self.bn_params = [] # No batch norm params
			
			self.Q_val = tf.reduce_mean(self.output_samples, axis=1) # the Q value is the mean of the generated samples
		  
			self.action_grads = tf.gradients(self.output_samples/num_atoms, self.action) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
Exemple #10
0
    def __init__(self,
                 state,
                 action,
                 state_dims,
                 action_dims,
                 args,
                 is_training=False,
                 scope='critic'):
        # state - State input to pass through the network
        # action - Action input for which the Q value should be predicted

        self.state = state
        self.action = action
        self.state_dims = np.prod(
            state_dims
        )  #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
        self.action_dims = np.prod(action_dims)
        self.args = args
        self.is_training = is_training
        self.scope = scope

        # Networks params
        dense1_size = self.args.dense1_size
        dense2_size = self.args.dense2_size
        final_layer_init = self.args.final_layer_init

        with tf.variable_scope(self.scope):
            self.input_norm = batchnorm(self.state,
                                        self.is_training,
                                        scope='input_norm')

            self.dense1_mul = dense(
                self.input_norm,
                dense1_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                scope='dense1')

            self.dense1_bn = batchnorm(self.dense1_mul,
                                       self.is_training,
                                       scope='dense1')

            self.dense1 = relu(self.dense1_bn, scope='dense1')

            #Merge first dense layer with action input to get second dense layer
            self.dense2a = dense(
                self.dense1,
                dense2_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                scope='dense2a')

            self.dense2b = dense(
                self.action,
                dense2_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 /
                     tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                    1 / tf.sqrt(tf.to_float(dense1_size + self.action_dims))),
                scope='dense2b')

            self.dense2 = relu(self.dense2a + self.dense2b, scope='dense2')

            self.output = dense(self.dense2,
                                1,
                                weight_init=tf.random_uniform_initializer(
                                    -1 * final_layer_init, final_layer_init),
                                bias_init=tf.random_uniform_initializer(
                                    -1 * final_layer_init, final_layer_init),
                                scope='output')

            self.network_params = tf.trainable_variables(scope=self.scope)

            self.action_grads = tf.gradients(
                self.output, self.action
            )  # Gradient of value output wrt action input - used to train actor network
Exemple #11
0
 def build_bottom_block(self, inputs, name):
     outs = tf.contrib.layers.flatten(inputs, scope=name + '/flat')
     outs = ops.dense(outs, 4096, name + '/dense1')
     outs = ops.dense(outs, self.conf.class_num, name + '/dense2')
     return outs
Exemple #12
0
    def forward_pass(self,
                     state_in,
                     reshape=True,
                     sigmoid_out=False,
                     reuse=None):
        self.state_in = state_in

        shape_in = self.state_in.get_shape().as_list()

        # Get number of input channels for weight/bias init
        channels_in = shape_in[-1]

        with tf.variable_scope(self.scope, reuse=reuse):

            if reshape:
                # Reshape [batch_size, traj_len, H, W, C] into [batch_size*traj_len, H, W, C]
                self.state_in = tf.reshape(
                    self.state_in, [-1, shape_in[2], shape_in[3], shape_in[4]])

            self.conv1 = conv2d(
                self.state_in,
                self.num_filters,
                self.kernels[0],
                self.strides[0],
                kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(channels_in * self.kernels[0] * self.kernels[0]))), (
                        1.0 / tf.sqrt(
                            float(channels_in * self.kernels[0] *
                                  self.kernels[0])))),
                bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(channels_in * self.kernels[0] * self.kernels[0]))), (
                        1.0 / tf.sqrt(
                            float(channels_in * self.kernels[0] *
                                  self.kernels[0])))),
                scope='conv1')

            self.conv1 = lrelu(self.conv1, self.lrelu_alpha, scope='conv1')

            self.conv2 = conv2d(
                self.conv1,
                self.num_filters,
                self.kernels[1],
                self.strides[1],
                kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(self.num_filters * self.kernels[1] *
                          self.kernels[1]))), (1.0 / tf.sqrt(
                              float(self.num_filters * self.kernels[1] *
                                    self.kernels[1])))),
                bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(self.num_filters * self.kernels[1] *
                          self.kernels[1]))), (1.0 / tf.sqrt(
                              float(self.num_filters * self.kernels[1] *
                                    self.kernels[1])))),
                scope='conv2')

            self.conv2 = lrelu(self.conv2, self.lrelu_alpha, scope='conv2')

            self.conv3 = conv2d(
                self.conv2,
                self.num_filters,
                self.kernels[2],
                self.strides[2],
                kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(self.num_filters * self.kernels[2] *
                          self.kernels[2]))), (1.0 / tf.sqrt(
                              float(self.num_filters * self.kernels[2] *
                                    self.kernels[2])))),
                bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(self.num_filters * self.kernels[2] *
                          self.kernels[2]))), (1.0 / tf.sqrt(
                              float(self.num_filters * self.kernels[2] *
                                    self.kernels[2])))),
                scope='conv3')

            self.conv3 = lrelu(self.conv3, self.lrelu_alpha, scope='conv3')

            self.conv4 = conv2d(
                self.conv3,
                self.num_filters,
                self.kernels[3],
                self.strides[3],
                kernel_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(self.num_filters * self.kernels[3] *
                          self.kernels[3]))), (1.0 / tf.sqrt(
                              float(self.num_filters * self.kernels[3] *
                                    self.kernels[3])))),
                bias_init=tf.random_uniform_initializer((-1.0 / tf.sqrt(
                    float(self.num_filters * self.kernels[3] *
                          self.kernels[3]))), (1.0 / tf.sqrt(
                              float(self.num_filters * self.kernels[3] *
                                    self.kernels[3])))),
                scope='conv4')

            self.conv4 = lrelu(self.conv4, self.lrelu_alpha, scope='conv4')

            self.flatten = flatten(self.conv4)

            self.dense = dense(self.flatten,
                               self.dense_size,
                               kernel_init=tf.random_uniform_initializer(
                                   (-1.0 / tf.sqrt(float(self.num_filters))),
                                   (1.0 / tf.sqrt(float(self.num_filters)))),
                               bias_init=tf.random_uniform_initializer(
                                   (-1.0 / tf.sqrt(float(self.num_filters))),
                                   (1.0 / tf.sqrt(float(self.num_filters)))))

            self.output = dense(self.dense,
                                1,
                                kernel_init=tf.random_uniform_initializer(
                                    (-1.0 / tf.sqrt(float(self.dense_size))),
                                    (1.0 / tf.sqrt(float(self.dense_size)))),
                                bias_init=tf.random_uniform_initializer(
                                    (-1.0 / tf.sqrt(float(self.dense_size))),
                                    (1.0 / tf.sqrt(float(self.dense_size)))),
                                scope='output')

            if sigmoid_out:
                self.output = tf.nn.sigmoid(self.output)

            if reshape:
                # Reshape 1d reward output [batch_size*traj_len] into batches [batch_size, traj_len]
                self.output = tf.reshape(self.output, [-1, shape_in[1]])

            self.network_params = tf.trainable_variables(scope=self.scope)

        return self.output
    def __init__(self,
                 state,
                 audio,
                 state_dims,
                 action_dims,
                 action_bound_low,
                 action_bound_high,
                 args,
                 is_training=False,
                 scope='actor'):
        # state - State input to pass through the network
        # action_bounds - Network will output in range [-1,1]. Multiply this by action_bound to get output within desired boundaries of action space

        self.state = state
        self.audio = audio
        self.state_dims = np.prod(
            state_dims
        )  #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6)
        self.action_dims = np.prod(action_dims)
        self.action_bound_low = action_bound_low
        self.action_bound_high = action_bound_high
        self.args = args
        self.is_training = is_training
        self.scope = scope

        # Networks params
        dense1_size = self.args.dense1_size
        dense2_size = self.args.dense2_size
        final_layer_init = self.args.final_layer_init

        with tf.variable_scope(self.scope):
            self.fc1_a = tf.layers.dense(self.audio, 50, tf.nn.relu)
            self.fc2_a = tf.layers.dense(self.fc1_a, 50, tf.nn.relu)
            self.final_flat = tf.concat(
                [createNetwork_cnn(self.state), self.fc2_a], 1)
            # self.state_dims = np.prod(self.final_flat)
            # print(np.shape(self.final_flat), self.state_dims)

            self.input_norm = batchnorm(self.final_flat,
                                        self.is_training,
                                        scope='input_norm')

            self.dense1_mul = dense(
                self.input_norm,
                dense1_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                bias_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(self.state_dims))),
                    1 / tf.sqrt(tf.to_float(self.state_dims))),
                scope='dense1')

            self.dense1_bn = batchnorm(self.dense1_mul,
                                       self.is_training,
                                       scope='dense1')

            self.dense1 = relu(self.dense1_bn, scope='dense1')

            self.dense2_mul = dense(
                self.dense1,
                dense2_size,
                weight_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(dense1_size))),
                    1 / tf.sqrt(tf.to_float(dense1_size))),
                bias_init=tf.random_uniform_initializer(
                    (-1 / tf.sqrt(tf.to_float(dense1_size))),
                    1 / tf.sqrt(tf.to_float(dense1_size))),
                scope='dense2')

            self.dense2_bn = batchnorm(self.dense2_mul,
                                       self.is_training,
                                       scope='dense2')

            self.dense2 = relu(self.dense2_bn, scope='dense2')

            self.output_mul = dense(self.dense2,
                                    self.action_dims,
                                    weight_init=tf.random_uniform_initializer(
                                        -1 * final_layer_init,
                                        final_layer_init),
                                    bias_init=tf.random_uniform_initializer(
                                        -1 * final_layer_init,
                                        final_layer_init),
                                    scope='output')

            self.output_tanh = tanh(self.output_mul, scope='output')

            # Scale tanh output to lower and upper action bounds
            self.output = tf.multiply(
                0.5,
                tf.multiply(self.output_tanh,
                            (self.action_bound_high - self.action_bound_low)) +
                (self.action_bound_high + self.action_bound_low))

            self.network_params = tf.trainable_variables(scope=self.scope)