def rainbow_network(num_actions, num_atoms, support, network_type, state): """The convolutional network used to compute agent's Q-value distributions. Args: num_actions: int, number of actions. num_atoms: int, the number of buckets of the value function distribution. support: tf.linspace, the support of the Q-value distribution. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. Returns: net: _network_type object containing the tensors output by the network. """ weights_initializer = layers.variance_scaling_initializer(factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) net = tf.cast(state, tf.float32) net = tf.div(net, 255.) net = layers.conv2d(net, 32, [8, 8], stride=4, weights_initializer=weights_initializer) net = layers.conv2d(net, 64, [4, 4], stride=2, weights_initializer=weights_initializer) net = layers.conv2d(net, 64, [3, 3], stride=1, weights_initializer=weights_initializer) net = layers.flatten(net) net = layers.fully_connected(net, 512, weights_initializer=weights_initializer) net = layers.fully_connected(net, num_actions * num_atoms, activation_fn=None, weights_initializer=weights_initializer) logits = tf.reshape(net, [-1, num_actions, num_atoms]) probabilities = layers.softmax(logits) q_values = tf.reduce_sum(support * probabilities, axis=2) return network_type(q_values, logits, probabilities)
def nature_dqn_network(num_actions, network_type, state): """The convolutional network used to compute the agent's Q-values. Args: num_actions: int, number of actions. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. Returns: net: _network_type object containing the tensors output by the network. """ net = tf.cast(state, tf.float32) net = tf.div(net, 255.) net = layers.conv2d(net, 32, [8, 8], stride=4) net = layers.conv2d(net, 64, [4, 4], stride=2) net = layers.conv2d(net, 64, [3, 3], stride=1) net = layers.flatten(net) net = layers.fully_connected(net, 512) q_values = layers.fully_connected(net, num_actions, activation_fn=None) return network_type(q_values)
def __init__(self, in_channels=1, out_channels=256, kernel_size=9, strides=1, padding='valid'): super(Convolution, self).__init__() self.conv = layers.conv2d(inputs=in_channels, filters=out_channels, kernel_size=kernel_size, strides=strides, padding=padding) self.relu = nn.relu()
def implicit_quantile_network(num_actions, quantile_embedding_dim, network_type, state, num_quantiles): """The Implicit Quantile ConvNet. Args: num_actions: int, number of actions. quantile_embedding_dim: int, embedding dimension for the quantile input. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. num_quantiles: int, number of quantile inputs. Returns: net: _network_type object containing the tensors output by the network. """ weights_initializer = layers.variance_scaling_initializer(factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) state_net = tf.cast(state, tf.float32) state_net = tf.div(state_net, 255.) state_net = layers.conv2d(state_net, 32, [8, 8], stride=4, weights_initializer=weights_initializer) state_net = layers.conv2d(state_net, 64, [4, 4], stride=2, weights_initializer=weights_initializer) state_net = layers.conv2d(state_net, 64, [3, 3], stride=1, weights_initializer=weights_initializer) state_net = layers.flatten(state_net) state_net_size = state_net.get_shape().as_list()[-1] state_net_tiled = tf.tile(state_net, [num_quantiles, 1]) batch_size = state_net.get_shape().as_list()[0] quantiles_shape = [num_quantiles * batch_size, 1] quantiles = tf.random_uniform(quantiles_shape, minval=0, maxval=1, dtype=tf.float32) quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim]) pi = tf.constant(math.pi) quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1), tf.float32) * pi * quantile_net quantile_net = tf.cos(quantile_net) quantile_net = layers.fully_connected( quantile_net, state_net_size, weights_initializer=weights_initializer) # Hadamard product. net = tf.multiply(state_net_tiled, quantile_net) net = layers.fully_connected(net, 512, weights_initializer=weights_initializer) quantile_values = layers.fully_connected( net, num_actions, activation_fn=None, weights_initializer=weights_initializer) return network_type(quantile_values=quantile_values, quantiles=quantiles)
def _build_model(self, inputs): self.inputs = inputs if self.data_format == 'NCHW': reduction_axis = [2,3] _inputs = tf.cast(tf.transpose(inputs, [0, 3, 1, 2]), tf.float32) else: reduction_axis = [1,2] _inputs = tf.cast(inputs, tf.float32) with arg_scope([layers.conv2d], num_outputs=16, kernel_size=3, stride=1, padding='SAME', data_format=self.data_format, activation_fn=None, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(2e-4), biases_initializer=tf.constant_initializer(0.2), biases_regularizer=None),\ arg_scope([layers.batch_norm], decay=0.9, center=True, scale=True, updates_collections=None, is_training=self.is_training, fused=True, data_format=self.data_format),\ arg_scope([layers.avg_pool2d], kernel_size=[3,3], stride=[2,2], padding='SAME', data_format=self.data_format): with tf.variable_scope('Layer1'): conv=layers.conv2d(_inputs, num_outputs=64, kernel_size=3) actv=tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer2'): conv=layers.conv2d(actv) actv=tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer3'): conv1=layers.conv2d(actv) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn2=layers.batch_norm(conv2) res= tf.add(actv, bn2) with tf.variable_scope('Layer4'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn2=layers.batch_norm(conv2) res= tf.add(res, bn2) with tf.variable_scope('Layer5'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer6'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer7'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer8'): convs = layers.conv2d(res, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer9'): convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=64) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=64) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer10'): convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=128) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=128) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer11'): convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=256) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=256) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer12'): conv1=layers.conv2d(res, num_outputs=512) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=512) bn=layers.batch_norm(conv2) avgp = tf.reduce_mean(bn, reduction_axis, keepdims=True ) ip=layers.fully_connected(layers.flatten(avgp), num_outputs=2, activation_fn=None, normalizer_fn=None, weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), biases_initializer=tf.constant_initializer(0.), scope='ip') self.outputs = ip return self.outputs