def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.Conv2D(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return q_out
def fprop(self, img_in, **kwargs): del kwargs #def model(img_in, num_actions, scope, noisy=False, reuse=False, concat_softmax=False): """As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf""" with tf.variable_scope(self.scope, reuse=self.reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.Conv2D(32, kernel_size=8, strides=4, activation=tf.nn.relu) out = layers.Conv2D(64, kernel_size=4, strides=2, activation=tf.nn.relu) out = layers.Conv2D(64, kernel_size=3, strides=1, activation=tf.nn.relu) out = layers.Flatten()(out) with tf.variable_scope("action_value"): if self.noisy: # Apply noisy network on fully connected layers # ref: https://arxiv.org/abs/1706.10295 out = noisy_dense(out, name='noisy_fc1', size=512, activation=tf.nn.relu) out = noisy_dense(out, name='noisy_fc2', size=self.num_actions) else: out = layers.fully_connected(out, 512, activation=tf.nn.relu) out = layers.fully_connected(out, self.num_actions, activation=None) #V: Softmax - inspired by deep-rl-attack # #if concat_softmax: #prob = tf.nn.softmax(out) #return out return { self.O_LOGITS: out, self.O_PROBS: tf.nn.softmax(logits=out) }
def rainbow_network(num_actions, num_atoms, support, network_type, state): """The convolutional network used to compute agent's Q-value distributions. Args: num_actions: int, number of actions. num_atoms: int, the number of buckets of the value function distribution. support: tf.linspace, the support of the Q-value distribution. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. Returns: net: _network_type object containing the tensors output by the network. """ weights_initializer = layers.variance_scaling_initializer(factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) net = tf.cast(state, tf.float32) net = tf.div(net, 255.) net = layers.conv2d(net, 32, [8, 8], stride=4, weights_initializer=weights_initializer) net = layers.conv2d(net, 64, [4, 4], stride=2, weights_initializer=weights_initializer) net = layers.conv2d(net, 64, [3, 3], stride=1, weights_initializer=weights_initializer) net = layers.flatten(net) net = layers.fully_connected(net, 512, weights_initializer=weights_initializer) net = layers.fully_connected(net, num_actions * num_atoms, activation_fn=None, weights_initializer=weights_initializer) logits = tf.reshape(net, [-1, num_actions, num_atoms]) probabilities = layers.softmax(logits) q_values = tf.reduce_sum(support * probabilities, axis=2) return network_type(q_values, logits, probabilities)
def nature_dqn_network(num_actions, network_type, state): """The convolutional network used to compute the agent's Q-values. Args: num_actions: int, number of actions. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. Returns: net: _network_type object containing the tensors output by the network. """ net = tf.cast(state, tf.float32) net = tf.div(net, 255.) net = layers.conv2d(net, 32, [8, 8], stride=4) net = layers.conv2d(net, 64, [4, 4], stride=2) net = layers.conv2d(net, 64, [3, 3], stride=1) net = layers.flatten(net) net = layers.fully_connected(net, 512) q_values = layers.fully_connected(net, num_actions, activation_fn=None) return network_type(q_values)
def implicit_quantile_network(num_actions, quantile_embedding_dim, network_type, state, num_quantiles): """The Implicit Quantile ConvNet. Args: num_actions: int, number of actions. quantile_embedding_dim: int, embedding dimension for the quantile input. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. num_quantiles: int, number of quantile inputs. Returns: net: _network_type object containing the tensors output by the network. """ weights_initializer = layers.variance_scaling_initializer(factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) state_net = tf.cast(state, tf.float32) state_net = tf.div(state_net, 255.) state_net = layers.conv2d(state_net, 32, [8, 8], stride=4, weights_initializer=weights_initializer) state_net = layers.conv2d(state_net, 64, [4, 4], stride=2, weights_initializer=weights_initializer) state_net = layers.conv2d(state_net, 64, [3, 3], stride=1, weights_initializer=weights_initializer) state_net = layers.flatten(state_net) state_net_size = state_net.get_shape().as_list()[-1] state_net_tiled = tf.tile(state_net, [num_quantiles, 1]) batch_size = state_net.get_shape().as_list()[0] quantiles_shape = [num_quantiles * batch_size, 1] quantiles = tf.random_uniform(quantiles_shape, minval=0, maxval=1, dtype=tf.float32) quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim]) pi = tf.constant(math.pi) quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1), tf.float32) * pi * quantile_net quantile_net = tf.cos(quantile_net) quantile_net = layers.fully_connected( quantile_net, state_net_size, weights_initializer=weights_initializer) # Hadamard product. net = tf.multiply(state_net_tiled, quantile_net) net = layers.fully_connected(net, 512, weights_initializer=weights_initializer) quantile_values = layers.fully_connected( net, num_actions, activation_fn=None, weights_initializer=weights_initializer) return network_type(quantile_values=quantile_values, quantiles=quantiles)
def _build_model(self, inputs): self.inputs = inputs if self.data_format == 'NCHW': reduction_axis = [2,3] _inputs = tf.cast(tf.transpose(inputs, [0, 3, 1, 2]), tf.float32) else: reduction_axis = [1,2] _inputs = tf.cast(inputs, tf.float32) with arg_scope([layers.conv2d], num_outputs=16, kernel_size=3, stride=1, padding='SAME', data_format=self.data_format, activation_fn=None, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=layers.l2_regularizer(2e-4), biases_initializer=tf.constant_initializer(0.2), biases_regularizer=None),\ arg_scope([layers.batch_norm], decay=0.9, center=True, scale=True, updates_collections=None, is_training=self.is_training, fused=True, data_format=self.data_format),\ arg_scope([layers.avg_pool2d], kernel_size=[3,3], stride=[2,2], padding='SAME', data_format=self.data_format): with tf.variable_scope('Layer1'): conv=layers.conv2d(_inputs, num_outputs=64, kernel_size=3) actv=tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer2'): conv=layers.conv2d(actv) actv=tf.nn.relu(layers.batch_norm(conv)) with tf.variable_scope('Layer3'): conv1=layers.conv2d(actv) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn2=layers.batch_norm(conv2) res= tf.add(actv, bn2) with tf.variable_scope('Layer4'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn2=layers.batch_norm(conv2) res= tf.add(res, bn2) with tf.variable_scope('Layer5'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer6'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer7'): conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) res= tf.add(res, bn) with tf.variable_scope('Layer8'): convs = layers.conv2d(res, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer9'): convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=64) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=64) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer10'): convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=128) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=128) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer11'): convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2) convs = layers.batch_norm(convs) conv1=layers.conv2d(res, num_outputs=256) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=256) bn=layers.batch_norm(conv2) pool = layers.avg_pool2d(bn) res= tf.add(convs, pool) with tf.variable_scope('Layer12'): conv1=layers.conv2d(res, num_outputs=512) actv1=tf.nn.relu(layers.batch_norm(conv1)) conv2=layers.conv2d(actv1, num_outputs=512) bn=layers.batch_norm(conv2) avgp = tf.reduce_mean(bn, reduction_axis, keepdims=True ) ip=layers.fully_connected(layers.flatten(avgp), num_outputs=2, activation_fn=None, normalizer_fn=None, weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), biases_initializer=tf.constant_initializer(0.), scope='ip') self.outputs = ip return self.outputs
def dueling_model(img_in, num_actions, scope, noisy=False, reuse=False, concat_softmax=False): """As described in https://arxiv.org/abs/1511.06581""" with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.Conv2D(32, kernel_size=8, strides=4, activation=tf.nn.relu) out = layers.Conv2D(64, kernel_size=4, strides=2, activation=tf.nn.relu) out = layers.Conv2D(64, kernel_size=3, strides=1, activation=tf.nn.relu) out = layers.Flatten()(out) with tf.variable_scope("state_value"): if noisy: # Apply noisy network on fully connected layers # ref: https://arxiv.org/abs/1706.10295 state_hidden = noisy_dense(out, name='noisy_fc1', size=512, activation=tf.nn.relu) state_score = noisy_dense(state_hidden, name='noisy_fc2', size=1) else: state_hidden = layers.fully_connected(out, num_outputs=512, activation=tf.nn.relu) state_score = layers.fully_connected(state_hidden, num_outputs=1, activation=None) with tf.variable_scope("action_value"): if noisy: # Apply noisy network on fully connected layers # ref: https://arxiv.org/abs/1706.10295 actions_hidden = noisy_dense(out, name='noisy_fc1', size=512, activation=tf.nn.relu) action_scores = noisy_dense(actions_hidden, name='noisy_fc2', size=num_actions) else: actions_hidden = layers.fully_connected(out, num_outputs=512, activation=tf.nn.relu) action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores = action_scores - tf.expand_dims( action_scores_mean, 1) return state_score + action_scores
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, cnn_extractor=nature_cnn, feature_extraction="cnn", obs_phs=None, layer_norm=False, dueling=True, act_fun=tf.nn.relu, **kwargs): super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, dueling=dueling, reuse=reuse, scale=(feature_extraction == "cnn"), obs_phs=obs_phs) self._kwargs_check(feature_extraction, kwargs) if layers is None: layers = [64, 64] with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("action_value"): if feature_extraction == "cnn": extracted_features = cnn_extractor(self.processed_obs, **kwargs) action_out = extracted_features else: extracted_features = tf.layers.flatten(self.processed_obs) action_out = extracted_features for layer_size in layers: action_out = tf_layers.fully_connected( action_out, num_outputs=layer_size, activation_fn=None) if layer_norm: action_out = tf_layers.layer_norm(action_out, center=True, scale=True) action_out = act_fun(action_out) action_scores = tf_layers.fully_connected( action_out, num_outputs=self.n_actions, activation_fn=None) if self.dueling: with tf.variable_scope("state_value"): state_out = extracted_features for layer_size in layers: state_out = tf_layers.fully_connected( state_out, num_outputs=layer_size, activation_fn=None) if layer_norm: state_out = tf_layers.layer_norm(state_out, center=True, scale=True) state_out = act_fun(state_out) state_score = tf_layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, axis=1) action_scores_centered = action_scores - tf.expand_dims( action_scores_mean, axis=1) q_out = state_score + action_scores_centered else: q_out = action_scores self.q_values = q_out self._setup_init()