Ejemplo n.º 1
0
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.Conv2D(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out
Ejemplo n.º 2
0
def rainbow_network(num_actions, num_atoms, support, network_type, state):
    """The convolutional network used to compute agent's Q-value distributions.

  Args:
    num_actions: int, number of actions.
    num_atoms: int, the number of buckets of the value function distribution.
    support: tf.linspace, the support of the Q-value distribution.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    weights_initializer = layers.variance_scaling_initializer(factor=1.0 /
                                                              np.sqrt(3.0),
                                                              mode='FAN_IN',
                                                              uniform=True)

    net = tf.cast(state, tf.float32)
    net = tf.div(net, 255.)
    net = layers.conv2d(net,
                        32, [8, 8],
                        stride=4,
                        weights_initializer=weights_initializer)
    net = layers.conv2d(net,
                        64, [4, 4],
                        stride=2,
                        weights_initializer=weights_initializer)
    net = layers.conv2d(net,
                        64, [3, 3],
                        stride=1,
                        weights_initializer=weights_initializer)
    net = layers.flatten(net)
    net = layers.fully_connected(net,
                                 512,
                                 weights_initializer=weights_initializer)
    net = layers.fully_connected(net,
                                 num_actions * num_atoms,
                                 activation_fn=None,
                                 weights_initializer=weights_initializer)

    logits = tf.reshape(net, [-1, num_actions, num_atoms])
    probabilities = layers.softmax(logits)
    q_values = tf.reduce_sum(support * probabilities, axis=2)
    return network_type(q_values, logits, probabilities)
Ejemplo n.º 3
0
def nature_dqn_network(num_actions, network_type, state):
    """The convolutional network used to compute the agent's Q-values.

  Args:
    num_actions: int, number of actions.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    net = tf.cast(state, tf.float32)
    net = tf.div(net, 255.)
    net = layers.conv2d(net, 32, [8, 8], stride=4)
    net = layers.conv2d(net, 64, [4, 4], stride=2)
    net = layers.conv2d(net, 64, [3, 3], stride=1)
    net = layers.flatten(net)
    net = layers.fully_connected(net, 512)
    q_values = layers.fully_connected(net, num_actions, activation_fn=None)
    return network_type(q_values)
Ejemplo n.º 4
0
def implicit_quantile_network(num_actions, quantile_embedding_dim,
                              network_type, state, num_quantiles):
    """The Implicit Quantile ConvNet.

  Args:
    num_actions: int, number of actions.
    quantile_embedding_dim: int, embedding dimension for the quantile input.
    network_type: namedtuple, collection of expected values to return.
    state: `tf.Tensor`, contains the agent's current state.
    num_quantiles: int, number of quantile inputs.

  Returns:
    net: _network_type object containing the tensors output by the network.
  """
    weights_initializer = layers.variance_scaling_initializer(factor=1.0 /
                                                              np.sqrt(3.0),
                                                              mode='FAN_IN',
                                                              uniform=True)

    state_net = tf.cast(state, tf.float32)
    state_net = tf.div(state_net, 255.)
    state_net = layers.conv2d(state_net,
                              32, [8, 8],
                              stride=4,
                              weights_initializer=weights_initializer)
    state_net = layers.conv2d(state_net,
                              64, [4, 4],
                              stride=2,
                              weights_initializer=weights_initializer)
    state_net = layers.conv2d(state_net,
                              64, [3, 3],
                              stride=1,
                              weights_initializer=weights_initializer)
    state_net = layers.flatten(state_net)
    state_net_size = state_net.get_shape().as_list()[-1]
    state_net_tiled = tf.tile(state_net, [num_quantiles, 1])

    batch_size = state_net.get_shape().as_list()[0]
    quantiles_shape = [num_quantiles * batch_size, 1]
    quantiles = tf.random_uniform(quantiles_shape,
                                  minval=0,
                                  maxval=1,
                                  dtype=tf.float32)

    quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim])
    pi = tf.constant(math.pi)
    quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1),
                           tf.float32) * pi * quantile_net
    quantile_net = tf.cos(quantile_net)
    quantile_net = layers.fully_connected(
        quantile_net, state_net_size, weights_initializer=weights_initializer)
    # Hadamard product.
    net = tf.multiply(state_net_tiled, quantile_net)

    net = layers.fully_connected(net,
                                 512,
                                 weights_initializer=weights_initializer)
    quantile_values = layers.fully_connected(
        net,
        num_actions,
        activation_fn=None,
        weights_initializer=weights_initializer)

    return network_type(quantile_values=quantile_values, quantiles=quantiles)
Ejemplo n.º 5
0
 def _build_model(self, inputs):
     self.inputs = inputs
     if self.data_format == 'NCHW':
         reduction_axis = [2,3]
         _inputs = tf.cast(tf.transpose(inputs, [0, 3, 1, 2]), tf.float32)
     else:
         reduction_axis = [1,2]
         _inputs = tf.cast(inputs, tf.float32)
     with arg_scope([layers.conv2d], num_outputs=16,
                    kernel_size=3, stride=1, padding='SAME',
                    data_format=self.data_format,
                    activation_fn=None,
                    weights_initializer=layers.variance_scaling_initializer(),
                    weights_regularizer=layers.l2_regularizer(2e-4),
                    biases_initializer=tf.constant_initializer(0.2),
                    biases_regularizer=None),\
         arg_scope([layers.batch_norm],
                    decay=0.9, center=True, scale=True, 
                    updates_collections=None, is_training=self.is_training,
                    fused=True, data_format=self.data_format),\
         arg_scope([layers.avg_pool2d],
                    kernel_size=[3,3], stride=[2,2], padding='SAME',
                    data_format=self.data_format):
         with tf.variable_scope('Layer1'): 
             conv=layers.conv2d(_inputs, num_outputs=64, kernel_size=3)
             actv=tf.nn.relu(layers.batch_norm(conv))
         with tf.variable_scope('Layer2'): 
             conv=layers.conv2d(actv)
             actv=tf.nn.relu(layers.batch_norm(conv))
         with tf.variable_scope('Layer3'): 
             conv1=layers.conv2d(actv)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn2=layers.batch_norm(conv2)
             res= tf.add(actv, bn2)
         with tf.variable_scope('Layer4'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn2=layers.batch_norm(conv2)
             res= tf.add(res, bn2)
         with tf.variable_scope('Layer5'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer6'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer7'): 
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             res= tf.add(res, bn)
         with tf.variable_scope('Layer8'): 
             convs = layers.conv2d(res, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer9'):  
             convs = layers.conv2d(res, num_outputs=64, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=64)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=64)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer10'): 
             convs = layers.conv2d(res, num_outputs=128, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=128)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=128)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer11'): 
             convs = layers.conv2d(res, num_outputs=256, kernel_size=1, stride=2)
             convs = layers.batch_norm(convs)
             conv1=layers.conv2d(res, num_outputs=256)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=256)
             bn=layers.batch_norm(conv2)
             pool = layers.avg_pool2d(bn)
             res= tf.add(convs, pool)
         with tf.variable_scope('Layer12'): 
             conv1=layers.conv2d(res, num_outputs=512)
             actv1=tf.nn.relu(layers.batch_norm(conv1))
             conv2=layers.conv2d(actv1, num_outputs=512)
             bn=layers.batch_norm(conv2)
             avgp = tf.reduce_mean(bn, reduction_axis,  keepdims=True )
     ip=layers.fully_connected(layers.flatten(avgp), num_outputs=2,
                 activation_fn=None, normalizer_fn=None,
                 weights_initializer=tf.random_normal_initializer(mean=0., stddev=0.01), 
                 biases_initializer=tf.constant_initializer(0.), scope='ip')
     self.outputs = ip
     return self.outputs
Ejemplo n.º 6
0

model = keras.models.sequential
model.add(layers.conv2D(32(3,3), activation='relu',input_size=(128,128,1))
model.add(layers.maxpooling2D((2,2)))

model.add(layers.conv2D(64(3,3),activation='relu'))
model.add(layers.maxpooling2D((2,2)))

model.add(layers.conv2D(128(3,3),activation='relu'))
model.add(layers.maxpooling2D((2,2)))

model.add(layers.conv2D(128(3,3),activation='relu'))
model.add(layers.maxpooling2D((2,2)))

model.add(layers.flatten())

model.add(layers.dense(60,activation='relu'))
model.add(Droupout(0.20))
model.add(layers.dense(10,activation='softmax'))


# In[ ]:


model.summary()


# In[ ]: