예제 #1
0
def rainbow_network(num_actions, num_atoms, support, network_type, state):
    """The convolutional network used to compute agent's Q-value distributions.

  Args:
    num_actions: int, number of actions.
    num_atoms: int, the number of buckets of the value function distribution.
    support: tf.linspace, the support of the Q-value distribution.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    weights_initializer = layers.variance_scaling_initializer(factor=1.0 /
                                                              np.sqrt(3.0),
                                                              mode='FAN_IN',
                                                              uniform=True)

    net = tf.cast(state, tf.float32)
    net = tf.div(net, 255.)
    net = layers.conv2d(net,
                        32, [8, 8],
                        stride=4,
                        weights_initializer=weights_initializer)
    net = layers.conv2d(net,
                        64, [4, 4],
                        stride=2,
                        weights_initializer=weights_initializer)
    net = layers.conv2d(net,
                        64, [3, 3],
                        stride=1,
                        weights_initializer=weights_initializer)
    net = layers.flatten(net)
    net = layers.fully_connected(net,
                                 512,
                                 weights_initializer=weights_initializer)
    net = layers.fully_connected(net,
                                 num_actions * num_atoms,
                                 activation_fn=None,
                                 weights_initializer=weights_initializer)

    logits = tf.reshape(net, [-1, num_actions, num_atoms])
    probabilities = layers.softmax(logits)
    q_values = tf.reduce_sum(support * probabilities, axis=2)
    return network_type(q_values, logits, probabilities)
예제 #2
0
def nature_dqn_network(num_actions, network_type, state):
    """The convolutional network used to compute the agent's Q-values.

  Args:
    num_actions: int, number of actions.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    net = tf.cast(state, tf.float32)
    net = tf.div(net, 255.)
    net = layers.conv2d(net, 32, [8, 8], stride=4)
    net = layers.conv2d(net, 64, [4, 4], stride=2)
    net = layers.conv2d(net, 64, [3, 3], stride=1)
    net = layers.flatten(net)
    net = layers.fully_connected(net, 512)
    q_values = layers.fully_connected(net, num_actions, activation_fn=None)
    return network_type(q_values)
예제 #3
0
 def __init__(self,
              in_channels=1,
              out_channels=256,
              kernel_size=9,
              strides=1,
              padding='valid'):
     super(Convolution, self).__init__()
     self.conv = layers.conv2d(inputs=in_channels,
                               filters=out_channels,
                               kernel_size=kernel_size,
                               strides=strides,
                               padding=padding)
     self.relu = nn.relu()
예제 #4
0
def implicit_quantile_network(num_actions, quantile_embedding_dim,
                              network_type, state, num_quantiles):
    """The Implicit Quantile ConvNet.

  Args:
    num_actions: int, number of actions.
    quantile_embedding_dim: int, embedding dimension for the quantile input.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.
    num_quantiles: int, number of quantile inputs.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    weights_initializer = layers.variance_scaling_initializer(factor=1.0 /
                                                              np.sqrt(3.0),
                                                              mode='FAN_IN',
                                                              uniform=True)

    state_net = tf.cast(state, tf.float32)
    state_net = tf.div(state_net, 255.)
    state_net = layers.conv2d(state_net,
                              32, [8, 8],
                              stride=4,
                              weights_initializer=weights_initializer)
    state_net = layers.conv2d(state_net,
                              64, [4, 4],
                              stride=2,
                              weights_initializer=weights_initializer)
    state_net = layers.conv2d(state_net,
                              64, [3, 3],
                              stride=1,
                              weights_initializer=weights_initializer)
    state_net = layers.flatten(state_net)
    state_net_size = state_net.get_shape().as_list()[-1]
    state_net_tiled = tf.tile(state_net, [num_quantiles, 1])

    batch_size = state_net.get_shape().as_list()[0]
    quantiles_shape = [num_quantiles * batch_size, 1]
    quantiles = tf.random_uniform(quantiles_shape,
                                  minval=0,
                                  maxval=1,
                                  dtype=tf.float32)

    quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim])
    pi = tf.constant(math.pi)
    quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1),
                           tf.float32) * pi * quantile_net
    quantile_net = tf.cos(quantile_net)
    quantile_net = layers.fully_connected(
        quantile_net, state_net_size, weights_initializer=weights_initializer)
    # Hadamard product.
    net = tf.multiply(state_net_tiled, quantile_net)

    net = layers.fully_connected(net,
                                 512,
                                 weights_initializer=weights_initializer)
    quantile_values = layers.fully_connected(
        net,
        num_actions,
        activation_fn=None,
        weights_initializer=weights_initializer)

    return network_type(quantile_values=quantile_values, quantiles=quantiles)
예제 #5
0
파일: models.py 프로젝트: deepsteg/deepsteg
 def _build_model(self, inputs):
     self.inputs = inputs
     if self.data_format == 'NCHW':
         reduction_axis = [2,3]
         _inputs = tf.cast(tf.transpose(inputs, [0, 3, 1, 2]), tf.float32)
     else:
         reduction_axis = [1,2]
         _inputs = tf.cast(inputs, tf.float32)
     with arg_scope([layers.conv2d], num_outputs=16,
                    kernel_size=3, stride=1, padding='SAME',
                    data_format=self.data_format,
                    activation_fn=None,
                    weights_initializer=layers.variance_scaling_initializer(),
                    weights_regularizer=layers.l2_regularizer(2e-4),
                    biases_initializer=tf.constant_initializer(0.2),
                    biases_regularizer=None),\
         arg_scope([layers.batch_norm],
                    decay=0.9, center=True, scale=True, 
                    updates_collections=None, is_training=self.is_training,
                    fused=True, data_format=self.data_format),\
         arg_scope([layers.avg_pool2d],
                    kernel_size=[3,3], stride=[2,2], padding='SAME',
                    data_format=self.data_format):
         with tf.variable_scope('Layer1'): 
             conv=layers.conv2d(_inputs, num_outputs=64, kernel_size=3)
             actv=tf.nn.relu(layers.batch_norm(conv))
         with tf.variable_scope('Layer2'): 
             conv=layers.conv2d(actv)
             actv=tf.nn.relu(layers.batch_norm(conv))
         with tf.variable_scope('Layer3'): 
             conv1=layers.conv2d(actv)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn2=layers.batch_norm(conv2)
             res= tf.add(actv, bn2)
         with tf.variable_scope('Layer4'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn2=layers.batch_norm(conv2)
             res= tf.add(res, bn2)
         with tf.variable_scope('Layer5'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer6'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer7'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer8'): 
             convs = layers.conv2d(res, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer9'):  
             convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=64)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=64)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer10'): 
             convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=128)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=128)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer11'): 
             convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=256)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=256)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer12'): 
             conv1=layers.conv2d(res, num_outputs=512)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=512)
             bn=layers.batch_norm(conv2)
             avgp = tf.reduce_mean(bn, reduction_axis,  keepdims=True )
     ip=layers.fully_connected(layers.flatten(avgp), num_outputs=2,
                 activation_fn=None, normalizer_fn=None,
                 weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), 
                 biases_initializer=tf.constant_initializer(0.), scope='ip')
     self.outputs = ip
     return self.outputs