def __init__(self, state, state_dims, action_dims, action_bound_low, action_bound_high, dense1_size, dense2_size, final_layer_init, scope='actor'): # state - State input to pass through the network # action_bounds - Network will output in range [-1,1]. Multiply this by action_bound to get output within desired boundaries of action space self.state = state self.state_dims = np.prod(state_dims) #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6) self.action_dims = np.prod(action_dims) self.action_bound_low = action_bound_low self.action_bound_high = action_bound_high self.scope = scope with tf.variable_scope(self.scope): self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1') self.dense1 = relu(self.dense1_mul, scope='dense1') self.dense2_mul = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size))), 1/tf.sqrt(tf.to_float(dense1_size))), bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size))), 1/tf.sqrt(tf.to_float(dense1_size))), scope='dense2') self.dense2 = relu(self.dense2_mul, scope='dense2') self.output_mul = dense(self.dense2, self.action_dims, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output') self.output_tanh = tanh(self.output_mul, scope='output') # Scale tanh output to lower and upper action bounds self.output = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.action_bound_high-self.action_bound_low)) + (self.action_bound_high+self.action_bound_low)) self.network_params = tf.trainable_variables(scope=self.scope) self.bn_params = [] # No batch norm params
def __init__(self, state, action, noise,state_dims, action_dims, noise_dims, dense1_size, dense2_size, final_layer_init, num_atoms, v_min, v_max, scope='critic'): # state - State input to pass through the network # action - Action input for which the Z distribution should be predicted self.state = state self.action = action self.noise = noise self.noise_dims = np.prod(noise_dims) self.state_dims = np.prod(state_dims) #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6) self.action_dims = np.prod(action_dims) self.v_min = v_min self.v_max = v_max self.scope = scope batch_size = 256 with tf.variable_scope(self.scope): self.dense1_mul = dense(self.state, dense1_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(self.state_dims))), 1/tf.sqrt(tf.to_float(self.state_dims))), scope='dense1') self.dense1 = relu(self.dense1_mul, scope='dense1') #Merge first dense layer with action and noise input to get second dense layer self.dense2a = dense(self.dense1, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2a') self.dense2a = tf.reshape(self.dense2a, [batch_size, 1 , dense2_size]) self.dense2b = dense(self.action, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), \ 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))),\ bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.action_dims))), scope='dense2b') self.dense2b = tf.reshape(self.dense2b, [batch_size, 1 , dense2_size]) self.noise = tf.reshape(self.noise, [batch_size*num_atoms , noise_dims]) self.dense2c = dense(self.noise, dense2_size, weight_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), bias_init=tf.random_uniform_initializer((-1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), 1/tf.sqrt(tf.to_float(dense1_size+self.noise_dims))), scope='dense2c') self.dense2c = tf.reshape(self.dense2c, [batch_size, num_atoms , dense2_size]) self.dense2 = relu(self.dense2a + self.dense2b + self.dense2c, scope='dense2') self.dense2 = tf.reshape(self.dense2, [batch_size*num_atoms, dense2_size]) self.output_mul = dense(self.dense2, 1, weight_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), bias_init=tf.random_uniform_initializer(-1*final_layer_init, final_layer_init), scope='output') self.output_tanh = tanh(0.2*self.output_mul, scope='output') self.output_samples = tf.multiply(0.5, tf.multiply(self.output_tanh, (self.v_max - self.v_min)) + (self.v_max + self.v_min)) self.output_samples = tf.reshape(self.output_samples, [batch_size, num_atoms]) self.network_params = tf.trainable_variables(scope=self.scope) self.bn_params = [] # No batch norm params self.Q_val = tf.reduce_mean(self.output_samples, axis=1) # the Q value is the mean of the generated samples self.action_grads = tf.gradients(self.output_samples/num_atoms, self.action) # gradient of mean of output Z-distribution wrt action input - used to train actor network, weighing the grads by z_values gives the mean across the output distribution
def generator(): """ Generator. Parameters ---------- input_: tensor, input images. angles: tensor, target gaze direction. reuse: bool, reuse the net if True. Returns ------- x: tensor, generated image. """ load_size = 64 style_dim = 2 angles = tf.compat.v1.placeholder(tf.float32, shape=[None, 2]) input_ = tf.compat.v1.placeholder(tf.float32, shape=[None, load_size, load_size, 3]) angles_reshaped = tf.reshape(angles, [-1, 1, 1, style_dim]) angles_tiled = tf.tile(angles_reshaped, [1, tf.shape(input_)[1], tf.shape(input_)[2], 1]) x = tf.concat([input_, angles_tiled], axis=3) with tf.compat.v1.variable_scope("generator", reuse=False): # input layer x = conv2d( x, load_size, d_h=1, d_w=1, scope="conv2d_input", use_bias=False, pad=3, conv_filters_dim=7, ) x = instance_norm(x, scope="in_input") x = relu(x) # encoder for i in range(2): x = conv2d( x, 2 * load_size, d_h=2, d_w=2, scope="conv2d_%d" % i, use_bias=False, pad=1, conv_filters_dim=4, ) x = instance_norm(x, scope="in_conv_%d" % i) x = relu(x) load_size = 2 * load_size # bottleneck for i in range(6): x_a = conv2d( x, load_size, conv_filters_dim=3, d_h=1, d_w=1, pad=1, use_bias=False, scope="conv_res_a_%d" % i, ) x_a = instance_norm(x_a, "in_res_a_%d" % i) x_a = relu(x_a) x_b = conv2d( x_a, load_size, conv_filters_dim=3, d_h=1, d_w=1, pad=1, use_bias=False, scope="conv_res_b_%d" % i, ) x_b = instance_norm(x_b, "in_res_b_%d" % i) x = x + x_b # decoder for i in range(2): x = deconv2d( x, int(load_size / 2), conv_filters_dim=4, d_h=2, d_w=2, use_bias=False, scope="deconv_%d" % i, ) x = instance_norm(x, scope="in_decon_%d" % i) x = relu(x) load_size = int(load_size / 2) x = conv2d( x, 3, conv_filters_dim=7, d_h=1, d_w=1, pad=3, use_bias=False, scope="output", ) x = tanh(x) return x, angles, input_
def generator(input_, angles, reuse=False): """ Generator. Parameters ---------- input_: tensor, input images. angles: tensor, target gaze direction. reuse: bool, reuse the net if True. Returns ------- x: tensor, generated image. """ channel = 64 style_dim = angles.get_shape().as_list()[-1] angles_reshaped = tf.reshape(angles, [-1, 1, 1, style_dim]) angles_tiled = tf.tile(angles_reshaped, [1, tf.shape(input_)[1], tf.shape(input_)[2], 1]) x = tf.concat([input_, angles_tiled], axis=3) with tf.compat.v1.variable_scope('generator', reuse=reuse): # input layer x = conv2d(x, channel, d_h=1, d_w=1, scope='conv2d_input', use_bias=False, pad=3, conv_filters_dim=7) x = instance_norm(x, scope='in_input') x = relu(x) # encoder for i in range(2): x = conv2d(x, 2 * channel, d_h=2, d_w=2, scope='conv2d_%d' % i, use_bias=False, pad=1, conv_filters_dim=4) x = instance_norm(x, scope='in_conv_%d' % i) x = relu(x) channel = 2 * channel # bottleneck for i in range(6): x_a = conv2d(x, channel, conv_filters_dim=3, d_h=1, d_w=1, pad=1, use_bias=False, scope='conv_res_a_%d' % i) x_a = instance_norm(x_a, 'in_res_a_%d' % i) x_a = relu(x_a) x_b = conv2d(x_a, channel, conv_filters_dim=3, d_h=1, d_w=1, pad=1, use_bias=False, scope='conv_res_b_%d' % i) x_b = instance_norm(x_b, 'in_res_b_%d' % i) x = x + x_b # decoder for i in range(2): x = deconv2d(x, int(channel / 2), conv_filters_dim=4, d_h=2, d_w=2, use_bias=False, scope='deconv_%d' % i) x = instance_norm(x, scope='in_decon_%d' % i) x = relu(x) channel = int(channel / 2) x = conv2d(x, 3, conv_filters_dim=7, d_h=1, d_w=1, pad=3, use_bias=False, scope='output') x = tanh(x) return x
def __init__(self, state, audio, state_dims, action_dims, action_bound_low, action_bound_high, args, is_training=False, scope='actor'): # state - State input to pass through the network # action_bounds - Network will output in range [-1,1]. Multiply this by action_bound to get output within desired boundaries of action space self.state = state self.audio = audio self.state_dims = np.prod( state_dims ) #Used to calculate the fan_in of the state layer (e.g. if state_dims is (3,2) fan_in should equal 6) self.action_dims = np.prod(action_dims) self.action_bound_low = action_bound_low self.action_bound_high = action_bound_high self.args = args self.is_training = is_training self.scope = scope # Networks params dense1_size = self.args.dense1_size dense2_size = self.args.dense2_size final_layer_init = self.args.final_layer_init with tf.variable_scope(self.scope): self.fc1_a = tf.layers.dense(self.audio, 50, tf.nn.relu) self.fc2_a = tf.layers.dense(self.fc1_a, 50, tf.nn.relu) self.final_flat = tf.concat( [createNetwork_cnn(self.state), self.fc2_a], 1) # self.state_dims = np.prod(self.final_flat) # print(np.shape(self.final_flat), self.state_dims) self.input_norm = batchnorm(self.final_flat, self.is_training, scope='input_norm') self.dense1_mul = dense( self.input_norm, dense1_size, weight_init=tf.random_uniform_initializer( (-1 / tf.sqrt(tf.to_float(self.state_dims))), 1 / tf.sqrt(tf.to_float(self.state_dims))), bias_init=tf.random_uniform_initializer( (-1 / tf.sqrt(tf.to_float(self.state_dims))), 1 / tf.sqrt(tf.to_float(self.state_dims))), scope='dense1') self.dense1_bn = batchnorm(self.dense1_mul, self.is_training, scope='dense1') self.dense1 = relu(self.dense1_bn, scope='dense1') self.dense2_mul = dense( self.dense1, dense2_size, weight_init=tf.random_uniform_initializer( (-1 / tf.sqrt(tf.to_float(dense1_size))), 1 / tf.sqrt(tf.to_float(dense1_size))), bias_init=tf.random_uniform_initializer( (-1 / tf.sqrt(tf.to_float(dense1_size))), 1 / tf.sqrt(tf.to_float(dense1_size))), scope='dense2') self.dense2_bn = batchnorm(self.dense2_mul, self.is_training, scope='dense2') self.dense2 = relu(self.dense2_bn, scope='dense2') self.output_mul = dense(self.dense2, self.action_dims, weight_init=tf.random_uniform_initializer( -1 * final_layer_init, final_layer_init), bias_init=tf.random_uniform_initializer( -1 * final_layer_init, final_layer_init), scope='output') self.output_tanh = tanh(self.output_mul, scope='output') # Scale tanh output to lower and upper action bounds self.output = tf.multiply( 0.5, tf.multiply(self.output_tanh, (self.action_bound_high - self.action_bound_low)) + (self.action_bound_high + self.action_bound_low)) self.network_params = tf.trainable_variables(scope=self.scope)