コード例 #1
0
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.Conv2D(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out
コード例 #2
0
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
            if layer_norm:
                out = layers.layer_norm(out, center=True, scale=True)
            out = tf.nn.relu(out)
        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return q_out
コード例 #3
0
    def fprop(self, img_in, **kwargs):
        del kwargs
        #def model(img_in, num_actions, scope, noisy=False, reuse=False, concat_softmax=False):
        """As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf"""
        with tf.variable_scope(self.scope, reuse=self.reuse):
            out = img_in
            with tf.variable_scope("convnet"):
                # original architecture
                out = layers.Conv2D(32,
                                    kernel_size=8,
                                    strides=4,
                                    activation=tf.nn.relu)
                out = layers.Conv2D(64,
                                    kernel_size=4,
                                    strides=2,
                                    activation=tf.nn.relu)
                out = layers.Conv2D(64,
                                    kernel_size=3,
                                    strides=1,
                                    activation=tf.nn.relu)
            out = layers.Flatten()(out)

            with tf.variable_scope("action_value"):
                if self.noisy:
                    # Apply noisy network on fully connected layers
                    # ref: https://arxiv.org/abs/1706.10295
                    out = noisy_dense(out,
                                      name='noisy_fc1',
                                      size=512,
                                      activation=tf.nn.relu)
                    out = noisy_dense(out,
                                      name='noisy_fc2',
                                      size=self.num_actions)
                else:
                    out = layers.fully_connected(out,
                                                 512,
                                                 activation=tf.nn.relu)
                    out = layers.fully_connected(out,
                                                 self.num_actions,
                                                 activation=None)
                #V: Softmax - inspired by deep-rl-attack #
                #if concat_softmax:
                #prob = tf.nn.softmax(out)
            #return out
            return {
                self.O_LOGITS: out,
                self.O_PROBS: tf.nn.softmax(logits=out)
            }
コード例 #4
0
ファイル: atari_lib.py プロジェクト: ustyuzhaninky/dopamine
def rainbow_network(num_actions, num_atoms, support, network_type, state):
    """The convolutional network used to compute agent's Q-value distributions.

  Args:
    num_actions: int, number of actions.
    num_atoms: int, the number of buckets of the value function distribution.
    support: tf.linspace, the support of the Q-value distribution.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    weights_initializer = layers.variance_scaling_initializer(factor=1.0 /
                                                              np.sqrt(3.0),
                                                              mode='FAN_IN',
                                                              uniform=True)

    net = tf.cast(state, tf.float32)
    net = tf.div(net, 255.)
    net = layers.conv2d(net,
                        32, [8, 8],
                        stride=4,
                        weights_initializer=weights_initializer)
    net = layers.conv2d(net,
                        64, [4, 4],
                        stride=2,
                        weights_initializer=weights_initializer)
    net = layers.conv2d(net,
                        64, [3, 3],
                        stride=1,
                        weights_initializer=weights_initializer)
    net = layers.flatten(net)
    net = layers.fully_connected(net,
                                 512,
                                 weights_initializer=weights_initializer)
    net = layers.fully_connected(net,
                                 num_actions * num_atoms,
                                 activation_fn=None,
                                 weights_initializer=weights_initializer)

    logits = tf.reshape(net, [-1, num_actions, num_atoms])
    probabilities = layers.softmax(logits)
    q_values = tf.reduce_sum(support * probabilities, axis=2)
    return network_type(q_values, logits, probabilities)
コード例 #5
0
ファイル: atari_lib.py プロジェクト: ustyuzhaninky/dopamine
def nature_dqn_network(num_actions, network_type, state):
    """The convolutional network used to compute the agent's Q-values.

  Args:
    num_actions: int, number of actions.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    net = tf.cast(state, tf.float32)
    net = tf.div(net, 255.)
    net = layers.conv2d(net, 32, [8, 8], stride=4)
    net = layers.conv2d(net, 64, [4, 4], stride=2)
    net = layers.conv2d(net, 64, [3, 3], stride=1)
    net = layers.flatten(net)
    net = layers.fully_connected(net, 512)
    q_values = layers.fully_connected(net, num_actions, activation_fn=None)
    return network_type(q_values)
コード例 #6
0
ファイル: atari_lib.py プロジェクト: ustyuzhaninky/dopamine
def implicit_quantile_network(num_actions, quantile_embedding_dim,
                              network_type, state, num_quantiles):
    """The Implicit Quantile ConvNet.

  Args:
    num_actions: int, number of actions.
    quantile_embedding_dim: int, embedding dimension for the quantile input.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.
    num_quantiles: int, number of quantile inputs.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    weights_initializer = layers.variance_scaling_initializer(factor=1.0 /
                                                              np.sqrt(3.0),
                                                              mode='FAN_IN',
                                                              uniform=True)

    state_net = tf.cast(state, tf.float32)
    state_net = tf.div(state_net, 255.)
    state_net = layers.conv2d(state_net,
                              32, [8, 8],
                              stride=4,
                              weights_initializer=weights_initializer)
    state_net = layers.conv2d(state_net,
                              64, [4, 4],
                              stride=2,
                              weights_initializer=weights_initializer)
    state_net = layers.conv2d(state_net,
                              64, [3, 3],
                              stride=1,
                              weights_initializer=weights_initializer)
    state_net = layers.flatten(state_net)
    state_net_size = state_net.get_shape().as_list()[-1]
    state_net_tiled = tf.tile(state_net, [num_quantiles, 1])

    batch_size = state_net.get_shape().as_list()[0]
    quantiles_shape = [num_quantiles * batch_size, 1]
    quantiles = tf.random_uniform(quantiles_shape,
                                  minval=0,
                                  maxval=1,
                                  dtype=tf.float32)

    quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim])
    pi = tf.constant(math.pi)
    quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1),
                           tf.float32) * pi * quantile_net
    quantile_net = tf.cos(quantile_net)
    quantile_net = layers.fully_connected(
        quantile_net, state_net_size, weights_initializer=weights_initializer)
    # Hadamard product.
    net = tf.multiply(state_net_tiled, quantile_net)

    net = layers.fully_connected(net,
                                 512,
                                 weights_initializer=weights_initializer)
    quantile_values = layers.fully_connected(
        net,
        num_actions,
        activation_fn=None,
        weights_initializer=weights_initializer)

    return network_type(quantile_values=quantile_values, quantiles=quantiles)
コード例 #7
0
ファイル: models.py プロジェクト: deepsteg/deepsteg
 def _build_model(self, inputs):
     self.inputs = inputs
     if self.data_format == 'NCHW':
         reduction_axis = [2,3]
         _inputs = tf.cast(tf.transpose(inputs, [0, 3, 1, 2]), tf.float32)
     else:
         reduction_axis = [1,2]
         _inputs = tf.cast(inputs, tf.float32)
     with arg_scope([layers.conv2d], num_outputs=16,
                    kernel_size=3, stride=1, padding='SAME',
                    data_format=self.data_format,
                    activation_fn=None,
                    weights_initializer=layers.variance_scaling_initializer(),
                    weights_regularizer=layers.l2_regularizer(2e-4),
                    biases_initializer=tf.constant_initializer(0.2),
                    biases_regularizer=None),\
         arg_scope([layers.batch_norm],
                    decay=0.9, center=True, scale=True, 
                    updates_collections=None, is_training=self.is_training,
                    fused=True, data_format=self.data_format),\
         arg_scope([layers.avg_pool2d],
                    kernel_size=[3,3], stride=[2,2], padding='SAME',
                    data_format=self.data_format):
         with tf.variable_scope('Layer1'): 
             conv=layers.conv2d(_inputs, num_outputs=64, kernel_size=3)
             actv=tf.nn.relu(layers.batch_norm(conv))
         with tf.variable_scope('Layer2'): 
             conv=layers.conv2d(actv)
             actv=tf.nn.relu(layers.batch_norm(conv))
         with tf.variable_scope('Layer3'): 
             conv1=layers.conv2d(actv)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn2=layers.batch_norm(conv2)
             res= tf.add(actv, bn2)
         with tf.variable_scope('Layer4'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn2=layers.batch_norm(conv2)
             res= tf.add(res, bn2)
         with tf.variable_scope('Layer5'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer6'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer7'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer8'): 
             convs = layers.conv2d(res, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer9'):  
             convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=64)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=64)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer10'): 
             convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=128)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=128)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer11'): 
             convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=256)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=256)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer12'): 
             conv1=layers.conv2d(res, num_outputs=512)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=512)
             bn=layers.batch_norm(conv2)
             avgp = tf.reduce_mean(bn, reduction_axis,  keepdims=True )
     ip=layers.fully_connected(layers.flatten(avgp), num_outputs=2,
                 activation_fn=None, normalizer_fn=None,
                 weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), 
                 biases_initializer=tf.constant_initializer(0.), scope='ip')
     self.outputs = ip
     return self.outputs
コード例 #8
0
def dueling_model(img_in,
                  num_actions,
                  scope,
                  noisy=False,
                  reuse=False,
                  concat_softmax=False):
    """As described in https://arxiv.org/abs/1511.06581"""
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.Conv2D(32,
                                kernel_size=8,
                                strides=4,
                                activation=tf.nn.relu)
            out = layers.Conv2D(64,
                                kernel_size=4,
                                strides=2,
                                activation=tf.nn.relu)
            out = layers.Conv2D(64,
                                kernel_size=3,
                                strides=1,
                                activation=tf.nn.relu)
        out = layers.Flatten()(out)

        with tf.variable_scope("state_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                state_hidden = noisy_dense(out,
                                           name='noisy_fc1',
                                           size=512,
                                           activation=tf.nn.relu)
                state_score = noisy_dense(state_hidden,
                                          name='noisy_fc2',
                                          size=1)
            else:
                state_hidden = layers.fully_connected(out,
                                                      num_outputs=512,
                                                      activation=tf.nn.relu)
                state_score = layers.fully_connected(state_hidden,
                                                     num_outputs=1,
                                                     activation=None)
        with tf.variable_scope("action_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                actions_hidden = noisy_dense(out,
                                             name='noisy_fc1',
                                             size=512,
                                             activation=tf.nn.relu)
                action_scores = noisy_dense(actions_hidden,
                                            name='noisy_fc2',
                                            size=num_actions)
            else:
                actions_hidden = layers.fully_connected(out,
                                                        num_outputs=512,
                                                        activation=tf.nn.relu)
                action_scores = layers.fully_connected(actions_hidden,
                                                       num_outputs=num_actions,
                                                       activation=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores = action_scores - tf.expand_dims(
                action_scores_mean, 1)

        return state_score + action_scores
コード例 #9
0
ファイル: policies.py プロジェクト: dairatom/stable-baselines
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 layers=None,
                 cnn_extractor=nature_cnn,
                 feature_extraction="cnn",
                 obs_phs=None,
                 layer_norm=False,
                 dueling=True,
                 act_fun=tf.nn.relu,
                 **kwargs):
        super(FeedForwardPolicy,
              self).__init__(sess,
                             ob_space,
                             ac_space,
                             n_env,
                             n_steps,
                             n_batch,
                             dueling=dueling,
                             reuse=reuse,
                             scale=(feature_extraction == "cnn"),
                             obs_phs=obs_phs)

        self._kwargs_check(feature_extraction, kwargs)

        if layers is None:
            layers = [64, 64]

        with tf.variable_scope("model", reuse=reuse):
            with tf.variable_scope("action_value"):
                if feature_extraction == "cnn":
                    extracted_features = cnn_extractor(self.processed_obs,
                                                       **kwargs)
                    action_out = extracted_features
                else:
                    extracted_features = tf.layers.flatten(self.processed_obs)
                    action_out = extracted_features
                    for layer_size in layers:
                        action_out = tf_layers.fully_connected(
                            action_out,
                            num_outputs=layer_size,
                            activation_fn=None)
                        if layer_norm:
                            action_out = tf_layers.layer_norm(action_out,
                                                              center=True,
                                                              scale=True)
                        action_out = act_fun(action_out)

                action_scores = tf_layers.fully_connected(
                    action_out, num_outputs=self.n_actions, activation_fn=None)

            if self.dueling:
                with tf.variable_scope("state_value"):
                    state_out = extracted_features
                    for layer_size in layers:
                        state_out = tf_layers.fully_connected(
                            state_out,
                            num_outputs=layer_size,
                            activation_fn=None)
                        if layer_norm:
                            state_out = tf_layers.layer_norm(state_out,
                                                             center=True,
                                                             scale=True)
                        state_out = act_fun(state_out)
                    state_score = tf_layers.fully_connected(state_out,
                                                            num_outputs=1,
                                                            activation_fn=None)
                action_scores_mean = tf.reduce_mean(action_scores, axis=1)
                action_scores_centered = action_scores - tf.expand_dims(
                    action_scores_mean, axis=1)
                q_out = state_score + action_scores_centered
            else:
                q_out = action_scores

        self.q_values = q_out
        self._setup_init()