Esempio n. 1
0
def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_INITIALIZER_GAIN,
                               value=hparams.initializer_gain,
                               hparams=hparams)

  if not tf.contrib.eager.in_eager_mode():
    tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  elif hparams.initializer == "xavier":
    return tf.contrib.layers.xavier_initializer()
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
Esempio n. 2
0
def q_network(X_state, name):
    inputs = X_state
    with tf.variable_scope(name) as scope:
        dense_outputs = tf.layers.dense(inputs, 100, tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
        outputs = tf.layers.dense(dense_outputs, n_outputs, kernel_initializer=tf.variance_scaling_initializer())
    trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)
    trainable_vars_by_name = {var.name[len(scope.name):]: var for var in trainable_vars}
    return outputs, trainable_vars_by_name
Esempio n. 3
0
def _get_variable_initializer(hparams):
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
Esempio n. 4
0
def backbone_scope(freeze):
    """
    Args:
        freeze (bool): whether to freeze all the variables under the scope
    """
    def nonlin(x):
        x = get_norm()(x)
        return tf.nn.relu(x)

    with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \
            argscope(Conv2D, use_bias=False, activation=nonlin,
                     kernel_initializer=tf.variance_scaling_initializer(
                         scale=2.0, mode='fan_out')), \
            ExitStack() as stack:
        if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']:
            if freeze or cfg.BACKBONE.NORM == 'FreezeBN':
                stack.enter_context(argscope(BatchNorm, training=False))
            else:
                stack.enter_context(argscope(
                    BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod'))

        if freeze:
            stack.enter_context(freeze_variables(stop_gradient=False, skip_collection=True))
        else:
            # the layers are not completely freezed, but we may want to only freeze the affine
            if cfg.BACKBONE.FREEZE_AFFINE:
                stack.enter_context(custom_getter_scope(freeze_affine_getter))
        yield
Esempio n. 5
0
    def __init__(self,
                 name: str,
                 n_heads: int,
                 keys_encoder: Attendable,
                 values_encoder: Attendable = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)

        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob

        self.keys_encoder = keys_encoder

        if values_encoder is not None:
            self.values_encoder = values_encoder
        else:
            self.values_encoder = self.keys_encoder

        if self.n_heads <= 0:
            raise ValueError("Number of heads must be greater than zero.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        self._variable_scope.set_initializer(tf.variance_scaling_initializer(
            mode="fan_avg", distribution="uniform"))
Esempio n. 6
0
    def get_logits(self, image):
        gauss_init = tf.random_normal_initializer(stddev=0.01)
        with argscope(Conv2D,
                      kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \
                argscope([Conv2D, FullyConnected], activation=tf.nn.relu), \
                argscope([Conv2D, MaxPooling], data_format='channels_last'):
            # necessary padding to get 55x55 after conv1
            image = tf.pad(image, [[0, 0], [2, 2], [2, 2], [0, 0]])
            l = Conv2D('conv1', image, filters=96, kernel_size=11, strides=4, padding='VALID')
            # size: 55
            visualize_conv1_weights(l.variables.W)
            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
            l = MaxPooling('pool1', l, 3, strides=2, padding='VALID')
            # 27
            l = Conv2D('conv2', l, filters=256, kernel_size=5, split=2)
            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
            l = MaxPooling('pool2', l, 3, strides=2, padding='VALID')
            # 13
            l = Conv2D('conv3', l, filters=384, kernel_size=3)
            l = Conv2D('conv4', l, filters=384, kernel_size=3, split=2)
            l = Conv2D('conv5', l, filters=256, kernel_size=3, split=2)
            l = MaxPooling('pool3', l, 3, strides=2, padding='VALID')

            l = FullyConnected('fc6', l, 4096,
                               kernel_initializer=gauss_init,
                               bias_initializer=tf.ones_initializer())
            l = Dropout(l, rate=0.5)
            l = FullyConnected('fc7', l, 4096, kernel_initializer=gauss_init)
            l = Dropout(l, rate=0.5)
        logits = FullyConnected('fc8', l, 1000, kernel_initializer=gauss_init)
        return logits
Esempio n. 7
0
    def build_graph(self, image, label):
        image = image_preprocess(image, bgr=True)
        image = tf.transpose(image, [0, 3, 1, 2])

        cfg = {
            18: ([2, 2, 2, 2], preresnet_basicblock),
            34: ([3, 4, 6, 3], preresnet_basicblock),
        }
        defs, block_func = cfg[DEPTH]

        with argscope(Conv2D, use_bias=False,
                      kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'):
            convmaps = (LinearWrap(image)
                        .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
                        .MaxPooling('pool0', 3, strides=2, padding='SAME')
                        .apply2(preresnet_group, 'group0', block_func, 64, defs[0], 1)
                        .apply2(preresnet_group, 'group1', block_func, 128, defs[1], 2)
                        .apply2(preresnet_group, 'group2', block_func, 256, defs[2], 2)
                        .apply2(preresnet_group, 'group3new', block_func, 512, defs[3], 1)())
            print(convmaps)
            convmaps = GlobalAvgPooling('gap', convmaps)
            logits = FullyConnected('linearnew', convmaps, 1000)

        loss = compute_loss_and_error(logits, label)
        wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
        add_moving_summary(loss, wd_cost)
        return tf.add_n([loss, wd_cost], name='cost')
def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                       scope='additive-attention', reuse=False):
    """
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)).  v is a learnable vector and W is a learnable
    matrix. The rows of attn are softmax normalized.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

    Returns:
        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    """
    with tf.variable_scope(scope, reuse=reuse):
        aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
        bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
        aW = tf.expand_dims(aW, 2)
        bW = tf.expand_dims(bW, 1)
        v = tf.get_variable(
            name='dot_weights',
            initializer=tf.variance_scaling_initializer(),
            shape=[hidden_units]
        )
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
Esempio n. 9
0
def conv2d_fixed_padding(inputs,
                         filters,
                         kernel_size,
                         strides,
                         data_format="channels_first"):
  """Strided 2-D convolution with explicit padding.

  The padding is consistent and is based only on `kernel_size`, not on the
  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).

  Args:
    inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
    filters: `int` number of filters in the convolution.
    kernel_size: `int` size of the kernel to be used in the convolution.
    strides: `int` strides of the convolution.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A `Tensor` of shape `[batch, filters, height_out, width_out]`.
  """
  if strides > 1:
    inputs = fixed_padding(inputs, kernel_size, data_format=data_format)

  return tf.layers.conv2d(
      inputs=inputs,
      filters=filters,
      kernel_size=kernel_size,
      strides=strides,
      padding=("SAME" if strides == 1 else "VALID"),
      use_bias=False,
      kernel_initializer=tf.variance_scaling_initializer(),
      data_format=data_format)
Esempio n. 10
0
  def __call__(self, inputs, targets=None):
    """Calculate target logits or inferred target sequences.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      targets: None or int tensor with shape [batch_size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          output: [batch_size, decoded length]
          score: [batch_size, float]}
    """
    # Variance scaling is used here because it seems to work in many problems.
    # Other reasonable initializers may also work just as well.
    initializer = tf.variance_scaling_initializer(
        self.params.initializer_gain, mode="fan_avg", distribution="uniform")
    with tf.variable_scope("Transformer", initializer=initializer):
      # Calculate attention bias for encoder self-attention and decoder
      # multi-headed attention layers.
      attention_bias = model_utils.get_padding_bias(inputs)

      # Run the inputs through the encoder layer to map the symbol
      # representations to continuous representations.
      encoder_outputs = self.encode(inputs, attention_bias)

      # Generate output sequence if targets is None, or return logits if target
      # sequence is known.
      if targets is None:
        return self.predict(encoder_outputs, attention_bias)
      else:
        logits = self.decode(targets, encoder_outputs, attention_bias)
        return logits
Esempio n. 11
0
 def _fully_connected(self, x, out_dim):
   w = tf.get_variable(
       'DW', [x.get_shape()[1], out_dim],
       initializer=tf.variance_scaling_initializer(distribution='uniform'))
   b = tf.get_variable(
       'biases', [out_dim], initializer=tf.constant_initializer())
   return tf.nn.xw_plus_b(x, w, b)
Esempio n. 12
0
    def output(self) -> tf.Tensor:
        pooled_outputs = []
        for filter_size, num_filters in self.filters:
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, self.embedding_size, num_filters]
                w_filter = get_variable(
                    "conv_W", filter_shape,
                    initializer=tf.variance_scaling_initializer(
                        mode="fan_avg", distribution="uniform"))
                b_filter = get_variable(
                    "conv_bias", [num_filters],
                    initializer=tf.zeros_initializer())
                conv = tf.nn.conv1d(
                    self.embedded_inputs,
                    w_filter,
                    stride=1,
                    padding="VALID",
                    name="conv")

                # Apply nonlinearity
                conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                # Max-pooling over the outputs
                pooled = tf.reduce_max(conv_relu, 1)
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        return tf.concat(pooled_outputs, axis=1)
    def build_graph(self, image, label):
        assert tf.test.is_gpu_available()

        MEAN_IMAGE = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32)
        STD_IMAGE = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32)
        image = ((image / 255.0) - MEAN_IMAGE) / STD_IMAGE
        image = tf.transpose(image, [0, 3, 1, 2])

        pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform')
        with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
                argscope(Conv2D, kernel_initializer=pytorch_default_init):
            net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False)
            for i, blocks_in_module in enumerate(MODULE_SIZES):
                for j in range(blocks_in_module):
                    stride = 2 if j == 0 and i > 0 else 1
                    with tf.variable_scope("res%d.%d" % (i, j)):
                        net = preactivation_block(net, FILTER_SIZES[i], stride)
            net = GlobalAvgPooling('gap', net)
            logits = FullyConnected('linear', net, CLASS_NUM,
                                    kernel_initializer=tf.random_normal_initializer(stddev=1e-3))

        ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)
        ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss')

        single_label = tf.to_int32(tf.argmax(label, axis=1))
        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, single_label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'), ce_cost)
        add_param_summary(('.*/W', ['histogram']))

        # weight decay on all W matrixes. including convolutional layers
        wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')

        return tf.add_n([ce_cost, wd_cost], name='cost')
Esempio n. 14
0
def get_tf_initializer(name="glorot"):
    if name == "const":
        return tf.constant_initializer(0.3)
    elif name == "glorot":
        return tf.variance_scaling_initializer(
            scale=1.0, mode="fan_avg", distribution="normal")
    elif name == "normal":
        return tf.truncated_normal_initializer(dtype=tf.float32, stddev=0.36)
def q_network(state_tensor):
    inputs = state_tensor
    conv_outputs1 = tf.layers.conv2d(inputs, filters=32, kernel_size=(8,8), strides=4, padding='same', activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    conv_outputs2 = tf.layers.conv2d(conv_outputs1, filters=64, kernel_size=(4,4), strides=2, padding='same', activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    conv_outputs3 = tf.layers.conv2d(conv_outputs2, filters=64, kernel_size=(3,3), strides=1, padding='same', activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    flat_outputs = tf.reshape(conv_outputs3, shape=[-1, n_hidden_in])
    dense_outputs = tf.layers.dense(flat_outputs, n_hidden, activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    outputs = tf.layers.dense(dense_outputs, n_outputs, kernel_initializer=tf.variance_scaling_initializer())
    return outputs
Esempio n. 16
0
def Deconv2D(x, out_channel, kernel_shape,
             stride, padding='SAME',
             W_init=None, b_init=None,
             nl=tf.identity, use_bias=True,
             data_format='NHWC'):
    """
    2D deconvolution on 4D inputs.

    Args:
        x (tf.Tensor): a tensor of shape NHWC.
            Must have known number of channels, but can have other unknown dimensions.
        out_channel: the output number of channel.
        kernel_shape: (h, w) tuple or a int.
        stride: (h, w) tuple or a int.
        padding (str): 'valid' or 'same'. Case insensitive.
        W_init: initializer for W. Defaults to `tf.variance_scaling_initializer(2.0)`, i.e. kaiming-normal.
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function.
        use_bias (bool): whether to use bias.

    Returns:
        tf.Tensor: a NHWC tensor named ``output`` with attribute `variables`.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    in_shape = x.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[Deconv2D] Input cannot have unknown channel!"

    assert isinstance(out_channel, int), out_channel

    if W_init is None:
        W_init = tf.variance_scaling_initializer(scale=2.0)
    if b_init is None:
        b_init = tf.constant_initializer()

    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = tf.layers.Conv2DTranspose(
            out_channel, kernel_shape,
            strides=stride, padding=padding,
            data_format='channels_last' if data_format == 'NHWC' else 'channels_first',
            activation=lambda x: nl(x, name='output'),
            use_bias=use_bias,
            kernel_initializer=W_init,
            bias_initializer=b_init,
            trainable=True)
        ret = layer.apply(x, scope=tf.get_variable_scope())

    ret.variables = VariableHolder(W=layer.kernel)
    if use_bias:
        ret.variables.b = layer.bias
    return ret
Esempio n. 17
0
def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
Esempio n. 18
0
def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs):
    """
    Args:
        feature (any shape):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers

    Returns:
        cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
    """
    l = feature
    with argscope(Conv2D, data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out', distribution='normal')):
        for k in range(num_convs):
            l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
        l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
                           kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
    return fastrcnn_outputs('outputs', l, num_classes)
Esempio n. 19
0
 def embedded_inputs(self) -> tf.Tensor:
     with tf.variable_scope("input_projection"):
         embedding_matrix = get_variable(
             "word_embeddings",
             [len(self.vocabulary), self.embedding_size],
             initializer=tf.variance_scaling_initializer(
                 mode="fan_avg", distribution="uniform"))
         return dropout(
             tf.nn.embedding_lookup(embedding_matrix, self.inputs),
             self.dropout_keep_prob,
             self.train_mode)
Esempio n. 20
0
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
  """Strided 2-D convolution with explicit padding."""
  # The padding is consistent and is based only on `kernel_size`, not on the
  # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
  if strides > 1:
    inputs = fixed_padding(inputs, kernel_size, data_format)

  return tf.layers.conv2d(
      inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
      padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
      kernel_initializer=tf.variance_scaling_initializer(),
      data_format=data_format)
Esempio n. 21
0
    def modality_matrix(self) -> tf.Tensor:
        """Create an embedding matrix for varyining target modalities.

        Used to embed different target space modalities in the tensor2tensor
        models (e.g. during the zero-shot translation).
        """
        emb_size = self.input_sequence.temporal_states.shape.as_list()[-1]
        return get_variable(
            name="target_modality_embedding_matrix",
            shape=[32, emb_size],
            dtype=tf.float32,
            initializer=tf.variance_scaling_initializer(
                mode="fan_avg", distribution="uniform"))
Esempio n. 22
0
def fastrcnn_2fc_head(feature):
    """
    Args:
        feature (any shape):

    Returns:
        2D head feature
    """
    dim = cfg.FPN.FRCNN_FC_HEAD_DIM
    init = tf.variance_scaling_initializer()
    hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
    hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
    return hidden
Esempio n. 23
0
def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D, nl=tf.identity, use_bias=False,
                  W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
        logits = (LinearWrap(image)
                  .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
                  .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
                  .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1)
                  .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2)
                  .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2)
                  .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)
                  .GlobalAvgPooling('gap')
                  .FullyConnected('linear', 1000, nl=tf.identity)())
    return logits
Esempio n. 24
0
 def get_instance(args):
     """
     create an instance of the initializer
     """
     scale = float(args.get('scale', 1.0))
     mode = args.get('mode', "fan_in")
     assert (mode in ["fan_in", "fan_out", "fan_avg"])
     distribution = args.get('distribution', "normal")
     assert (distribution in ["normal", "uniform"])
     return tf.variance_scaling_initializer(scale,
                                            mode,
                                            distribution,
                                            seed=SEED)
Esempio n. 25
0
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None):
    """
    Args:
        feature (NCHW):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers
        norm (str or None): either None or 'GN'

    Returns:
        2D head feature
    """
    assert norm in [None, 'GN'], norm
    l = feature
    with argscope(Conv2D, data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out', distribution='normal')):
        for k in range(num_convs):
            l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm('gn{}'.format(k), l)
        l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
                           kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
    return l
Esempio n. 26
0
def fastrcnn_2fc_head(feature, num_classes):
    """
    Args:
        feature (any shape):
        num_classes(int): num_category + 1

    Returns:
        cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
    """
    dim = cfg.FPN.FRCNN_FC_HEAD_DIM
    init = tf.variance_scaling_initializer()
    hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
    hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
    return fastrcnn_outputs('outputs', hidden, num_classes)
Esempio n. 27
0
def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D, use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
        # Note that this pads the image by [2, 3] instead of [3, 2].
        # Similar things happen in later stride=2 layers as well.
        l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
        l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
        l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
        l = group_func('group1', l, block_func, 128, num_blocks[1], 2)
        l = group_func('group2', l, block_func, 256, num_blocks[2], 2)
        l = group_func('group3', l, block_func, 512, num_blocks[3], 2)
        l = GlobalAvgPooling('gap', l)
        logits = FullyConnected('linear', l, 1000,
                                kernel_initializer=tf.random_normal_initializer(stddev=0.01))
    return logits
Esempio n. 28
0
def maskrcnn_head(feature, num_class):
    """
    Args:
        feature (NxCx7x7):
        num_classes(int): num_category + 1

    Returns:
        mask_logits (N x num_category x 14 x 14):
    """
    with argscope([Conv2D, Deconv2D], data_format='NCHW',
                  W_init=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_in', distribution='normal')):
        l = Deconv2D('deconv', feature, 256, 2, stride=2, nl=tf.nn.relu)
        l = Conv2D('conv', l, num_class - 1, 1)
    return l
Esempio n. 29
0
def DepthConv(x, out_channel, kernel_shape, padding='SAME', stride=1,
              W_init=None, activation=tf.identity):
    in_shape = x.get_shape().as_list()
    in_channel = in_shape[1]
    assert out_channel % in_channel == 0, (out_channel, in_channel)
    channel_mult = out_channel // in_channel

    if W_init is None:
        W_init = tf.variance_scaling_initializer(2.0)
    kernel_shape = [kernel_shape, kernel_shape]
    filter_shape = kernel_shape + [in_channel, channel_mult]

    W = tf.get_variable('W', filter_shape, initializer=W_init)
    conv = tf.nn.depthwise_conv2d(x, W, [1, 1, stride, stride], padding=padding, data_format='NCHW')
    return activation(conv, name='output')
Esempio n. 30
0
def conv2d_fixed_padding(**kwargs):
  """conv2d with fixed_padding, based only on kernel_size."""
  strides = kwargs["strides"]
  if strides > 1:
    kwargs["inputs"] = fixed_padding(kwargs["inputs"], kwargs["kernel_size"],
                                     kwargs["data_format"])

  defaults = {
      "padding": ("SAME" if strides == 1 else "VALID"),
      "use_bias": False,
      "kernel_initializer": tf.variance_scaling_initializer(),
  }
  defaults.update(kwargs)

  return tf.layers.conv2d(**defaults)
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt

mnist = input_data.read_data_sets('data_MNIST/', one_hot=True)

input_node = 784
n_nodes_h1 = 621
n_nodes_h2 = 312
n_nodes_h3 = 128
n_nodes_h4 = 312
n_nodes_h5 = 621
n_classes = 784
batch_size = 128

scaler = tf.variance_scaling_initializer()

X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32)

# noise matrix
mean = 0.9
stddev = 0.7
noise_global = np.random.normal(mean, stddev, 784)


def stacked_an(X):
    hidden_1_layer = {
        'weights':
        tf.Variable(scaler([input_node, n_nodes_h1], dtype=tf.float32)),
        'biases': tf.Variable(scaler([n_nodes_h1], dtype=tf.float32))
Esempio n. 32
0
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format,
                         training, dropout, dropout_prob):
    """Strided 2-D convolution with explicit padding."""
    # The padding is consistent and is based only on `kernel_size`, not on the
    # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
    if strides > 1:
        inputs = fixed_padding(inputs, kernel_size, data_format)
    output = None
    if dropout != 'spiral':
        output = tf.layers.conv2d(
            inputs=inputs,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=('SAME' if strides == 1 else 'VALID'),
            use_bias=False,
            kernel_initializer=tf.variance_scaling_initializer(),
            data_format=data_format)
    elif dropout == 'spiral':
        output = spiral_conv2d(
            inputs=inputs,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=('SAME' if strides == 1 else 'VALID'),
            use_bias=False,
            kernel_initializer=tf.variance_scaling_initializer(),
            data_format=data_format,
            training=training,
            spiral_prob=dropout_prob)
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=7)(output_channel_last,
                                                      training=training)
        return tf.transpose(output_channel_last, [0, 3, 2, 1],
                            name='dropblock_output')

    #if not training:
    #    dropout_prob = 1
    if dropout == 'dropout':
        print("Normal Dropout")
        output = Dropout(rate=(1 - dropout_prob))(output, training=training)

    elif dropout == 'spatial':
        print("spatial Dropout")
        output = SpatialDropout2D(rate=(1 - dropout_prob),
                                  data_format=data_format)(output,
                                                           training=training)
    elif dropout == 'dropblock3':
        print("DropBlock3")
        output = tf.identity(output, name='conv_output')
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=3)(output_channel_last,
                                                      training=training)
        output = tf.transpose(output_channel_last, [0, 3, 2, 1],
                              name='dropblock_output')
    elif dropout == 'dropblock5':
        print("DropBlock5")
        output = tf.identity(output, name='conv_output')
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=5)(output_channel_last,
                                                      training=training)
        output = tf.transpose(output_channel_last, [0, 3, 2, 1],
                              name='dropblock_output')
    elif dropout == 'dropblock7':
        print("DropBlock7")
        output = tf.identity(output, name='conv_output')
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=7)(output_channel_last,
                                                      training=training)
        output = tf.transpose(output_channel_last, [0, 3, 2, 1],
                              name='dropblock_output')

    return output
Esempio n. 33
0
  def __init__(self,
               observation_spec,
               conv_layer_params=None,
               input_fc_layer_params=(75, 40),
               lstm_size=(40,),
               output_fc_layer_params=(75, 40),
               activation_fn=tf.keras.activations.relu,
               name='ValueRnnNetwork'):
    """Creates an instance of `ValueRnnNetwork`.

    Network supports calls with shape outer_rank + observation_spec.shape. Note
    outer_rank must be at least 1.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied before
        the LSTM cell.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` contains more than one observation.
    """
    if len(nest.flatten(observation_spec)) > 1:
      raise ValueError(
          'Network only supports observation_specs with a single observation.')

    input_layers = utils.mlp_layers(
        conv_layer_params,
        input_fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.keras.initializers.glorot_uniform(),
        name='input_mlp')

    # Create RNN cell
    if len(lstm_size) == 1:
      cell = tf.keras.layers.LSTMCell(lstm_size[0])
    else:
      cell = tf.keras.layers.StackedRNNCells(
          [tf.keras.layers.LSTMCell(size) for size in lstm_size])

    state_spec = nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), list(cell.state_size))

    output_layers = []
    if output_fc_layer_params:
      output_layers = [
          tf.keras.layers.Dense(
              num_units,
              activation=activation_fn,
              kernel_initializer=tf.variance_scaling_initializer(
                  scale=2.0, mode='fan_in', distribution='truncated_normal'),
              name='output/dense') for num_units in output_fc_layer_params
      ]

    value_projection_layer = keras_layers.Dense(
        1,
        activation=None,
        kernel_initializer=tf.random_uniform_initializer(
            minval=-0.03, maxval=0.03),
    )

    state_spec = nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), list(cell.state_size))

    super(ValueRnnNetwork, self).__init__(
        observation_spec=observation_spec,
        action_spec=None,
        state_spec=state_spec,
        name=name)

    self._conv_layer_params = conv_layer_params
    self._input_layers = input_layers
    self._cell = cell
    self._output_layers = output_layers
    self._value_projection_layer = value_projection_layer
Esempio n. 34
0
def novel_fc(x, hidden_sizes, training=False, l = (1e-6, 1e-6, 1e-6), p = (0.5, 0.5, 0.5),\
             n_cosmo_params = 7, n_hod_params = 4):

    cosmo_sizes, hod_sizes, cap_sizes = hidden_sizes

    if type(l) is float:
        cosmo_l, hod_l, cap_l = l, l, l
    else:
        cosmo_l, hod_l, cap_l = l

    if type(p) is float:
        cosmo_p, hod_p, cap_p = p, p, p
    else:
        cosmo_p, hod_p, cap_p = p

    initializer = tf.variance_scaling_initializer(scale=2.0)

    #onlly for duplicating r
    n_params = n_cosmo_params + n_hod_params
    cosmo_x = tf.slice(x, [0, 0], [-1, n_cosmo_params])
    cosmo_x = tf.concat(
        values=[cosmo_x, tf.slice(x, [0, n_params - 1], [-1, -1])], axis=1)
    #print tf.shape(cosmo_x)
    #print tf.shape(tf.slice(x, [0, n_params-1], [-1, -1]))
    hod_x = tf.slice(x, [0, n_cosmo_params], [-1, -1])

    cosmo_regularizer = tf.contrib.layers.l1_regularizer(cosmo_l)
    cosmo_out = cosmo_x

    for size in cosmo_sizes:
        fc_output = tf.layers.dense(cosmo_out, size,
                                 kernel_initializer = initializer,\
                                    kernel_regularizer = cosmo_regularizer)
        bd_out = tf.layers.dropout(fc_output, cosmo_p, training=training)
        bn_out = tf.layers.batch_normalization(bd_out,
                                               axis=-1,
                                               training=training)
        cosmo_out = tf.nn.relu(bn_out)  #tf.nn.leaky_relu(bn_out, alpha=0.01)

    hod_regularizer = tf.contrib.layers.l1_regularizer(hod_l)
    hod_out = hod_x

    for size in hod_sizes:
        fc_output = tf.layers.dense(hod_out, size,
                                 kernel_initializer = initializer,\
                                    kernel_regularizer = hod_regularizer)
        bd_out = tf.layers.dropout(fc_output, hod_p, training=training)
        bn_out = tf.layers.batch_normalization(bd_out,
                                               axis=-1,
                                               training=training)
        hod_out = tf.nn.relu(bn_out)  #tf.nn.leaky_relu(bn_out, alpha=0.01)

    cap_out = tf.concat(values=[cosmo_out, hod_out], axis=1)
    cap_regularizer = tf.contrib.layers.l1_regularizer(cap_l)

    for size in cap_sizes:
        fc_output = tf.layers.dense(cap_out, size,
                                 kernel_initializer = initializer,\
                                    kernel_regularizer = cap_regularizer)
        bd_out = tf.layers.dropout(fc_output, cap_p, training=training)
        bn_out = tf.layers.batch_normalization(bd_out,
                                               axis=-1,
                                               training=training)
        cap_out = tf.nn.relu(bn_out)  #tf.nn.leaky_relu(bn_out, alpha=0.01)

    pred = tf.layers.dense(cap_out,
                           1,
                           kernel_initializer=initializer,
                           kernel_regularizer=cap_regularizer)[:, 0]  #,
    return pred
Esempio n. 35
0
def conv2d_fixed_padding(inputs,
                         filters,
                         kernel_size,
                         strides,
                         data_format="channels_first",
                         use_td=False,
                         targeting_rate=None,
                         keep_prob=None,
                         is_training=None):
  """Strided 2-D convolution with explicit padding.

  The padding is consistent and is based only on `kernel_size`, not on the
  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).

  Args:
    inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
    filters: `int` number of filters in the convolution.
    kernel_size: `int` size of the kernel to be used in the convolution.
    strides: `int` strides of the convolution.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
    use_td: `str` one of "weight" or "unit". Set to False or "" to disable
      targeted dropout.
    targeting_rate: `float` proportion of weights to target with targeted
      dropout.
    keep_prob: `float` keep probability for targeted dropout.
    is_training: `bool` for whether the model is in training.

  Returns:
    A `Tensor` of shape `[batch, filters, height_out, width_out]`.

  Raises:
    Exception: if use_td is not valid.
  """
  if strides > 1:
    inputs = fixed_padding(inputs, kernel_size, data_format=data_format)

  if use_td:
    inputs_shape = common_layers.shape_list(inputs)
    if use_td == "weight":
      if data_format == "channels_last":
        size = kernel_size * kernel_size * inputs_shape[-1]
      else:
        size = kernel_size * kernel_size * inputs_shape[1]
      targeting_count = targeting_rate * tf.to_float(size)
      targeting_fn = common_layers.weight_targeting
    elif use_td == "unit":
      targeting_count = targeting_rate * filters
      targeting_fn = common_layers.unit_targeting
    else:
      raise Exception("Unrecognized targeted dropout type: %s" % use_td)

    y = common_layers.td_conv(
        inputs,
        filters,
        kernel_size,
        targeting_count,
        targeting_fn,
        keep_prob,
        is_training,
        do_prune=True,
        strides=strides,
        padding=("SAME" if strides == 1 else "VALID"),
        data_format=data_format,
        use_bias=False,
        kernel_initializer=tf.variance_scaling_initializer())
  else:
    y = tf.layers.conv2d(
        inputs=inputs,
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=("SAME" if strides == 1 else "VALID"),
        use_bias=False,
        kernel_initializer=tf.variance_scaling_initializer(),
        data_format=data_format)

  return y
Esempio n. 36
0
batch_size = 500
print('n_epoch = ', n_epoch)
print('batch_size = ', batch_size)
print()

#%% Graph
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_input), name="X")
Y = tf.placeholder(tf.float32, shape=(None, n_output), name="Y")
training = tf.placeholder_with_default(False, shape=(), name='training')

batch_norm_momentum = 0.9

with tf.name_scope("dnn"):
    he_init = tf.variance_scaling_initializer()

    my_batch_norm_layer = partial(tf.layers.batch_normalization,
                                  training=training,
                                  momentum=batch_norm_momentum)

    my_dense_layer = partial(tf.layers.dense,
                             kernel_initializer=he_init,
                             activation=tf.nn.relu)

    hidden1 = my_dense_layer(X, n_hidden[0], name="hidden1")
    bn1 = tf.nn.relu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden[1], name="hidden2")
    bn2 = tf.nn.relu(my_batch_norm_layer(hidden2))
    outputs = tf.layers.dense(bn2,
                              n_output,
Esempio n. 37
0
    def _build_graph(self, inputs):

        images, truemap_coded = inputs

        orig_imgs = images

        true_np = truemap_coded[..., 0]
        true_np = tf.cast(true_np, tf.int32)
        true_np = tf.identity(true_np, name='truemap-np')
        one_np = tf.one_hot(true_np, 2, axis=-1)
        true_np = tf.expand_dims(true_np, axis=-1)

        true_dist = truemap_coded[..., 1:]
        true_dist = tf.identity(true_dist, name='truemap-dist')

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            ####
            d = encoder(i, self.freeze)
            d[0] = crop_op(d[0], (184, 184))
            d[1] = crop_op(d[1], (72, 72))

            ####
            np_feat = decoder('np', d)
            np = BNReLU('preact_out_np', np_feat[-1])

            dist_feat = decoder('dst', d)
            dist = BNReLU('preact_out_dist', dist_feat[-1])

            ####
            logi_np = Conv2D('conv_out_np',
                             np,
                             2,
                             1,
                             use_bias=True,
                             activation=tf.identity)
            logi_np = tf.transpose(logi_np, [0, 2, 3, 1])
            soft_np = tf.nn.softmax(logi_np, axis=-1)
            prob_np = tf.identity(soft_np[..., 1], name='predmap-prob-np')
            prob_np = tf.expand_dims(prob_np, axis=-1)
            pred_np = tf.argmax(soft_np, axis=-1, name='predmap-np')
            pred_np = tf.expand_dims(tf.cast(pred_np, tf.float32), axis=-1)

            ####
            logi_dist = Conv2D('conv_out_dist',
                               dist,
                               1,
                               1,
                               use_bias=True,
                               activation=tf.identity)
            logi_dist = tf.transpose(logi_dist, [0, 2, 3, 1])
            prob_dist = tf.identity(logi_dist, name='predmap-prob-dist')
            pred_dist = tf.identity(logi_dist, name='predmap-dist')

            # encoded so that inference can extract all output at once
            predmap_coded = tf.concat([prob_np, pred_dist],
                                      axis=-1,
                                      name='predmap-coded')
        ####

        ####
        if get_current_tower_context().is_training:
            ######## LOSS
            ### Distance regression loss
            loss_mse = pred_dist - true_dist
            loss_mse = loss_mse * loss_mse
            loss_mse = tf.reduce_mean(loss_mse, name='loss-mse')
            add_moving_summary(loss_mse)

            ### Nuclei Blob classification loss
            loss_bce = categorical_crossentropy(soft_np, one_np)
            loss_bce = tf.reduce_mean(loss_bce, name='loss-bce')
            add_moving_summary(loss_bce)

            ### combine the loss into single cost function
            self.cost = tf.identity(loss_mse + loss_bce, name='overall-loss')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC")

            pred_np = colorize(prob_np[..., 0], cmap='jet')
            true_np = colorize(true_np[..., 0], cmap='jet')

            pred_dist = colorize(prob_dist[..., 0], cmap='jet')
            true_dist = colorize(true_dist[..., 0], cmap='jet')

            viz = tf.concat([
                orig_imgs,
                true_np,
                pred_np,
                true_dist,
                pred_dist,
            ], 2)

            tf.summary.image('output', viz, max_outputs=1)

        return
Esempio n. 38
0
    def _build_graph(self, inputs):

        images, truemap_coded = inputs
        orig_imgs = images

        if hasattr(self, 'type_classification') and self.type_classification:
            true_type = truemap_coded[..., 1]
            true_type = tf.cast(true_type, tf.int32)
            true_type = tf.identity(true_type, name='truemap-type')
            one_type = tf.one_hot(true_type, self.nr_types, axis=-1)
            true_type = tf.expand_dims(true_type, axis=-1)

            true_np = tf.cast(true_type > 0, tf.int32)  # ? sanity this
            true_np = tf.identity(true_np, name='truemap-np')
            one_np = tf.one_hot(tf.squeeze(true_np), 2, axis=-1)
        else:
            true_np = truemap_coded[..., 0]
            true_np = tf.cast(true_np, tf.int32)
            true_np = tf.identity(true_np, name='truemap-np')
            one_np = tf.one_hot(true_np, 2, axis=-1)
            true_np = tf.expand_dims(true_np, axis=-1)

        true_hv = truemap_coded[..., -2:]
        true_hv = tf.identity(true_hv, name='truemap-hv')

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            ####
            d = encoder(i, self.freeze)
            d[0] = crop_op(d[0], (184, 184))
            d[1] = crop_op(d[1], (72, 72))

            ####
            np_feat = decoder('np', d)
            npx = BNReLU('preact_out_np', np_feat[-1])

            hv_feat = decoder('hv', d)
            hv = BNReLU('preact_out_hv', hv_feat[-1])

            if self.type_classification:
                tp_feat = decoder('tp', d)
                tp = BNReLU('preact_out_tp', tp_feat[-1])

                # Nuclei Type Pixels (TP)
                logi_class = Conv2D('conv_out_tp',
                                    tp,
                                    self.nr_types,
                                    1,
                                    use_bias=True,
                                    activation=tf.identity)
                logi_class = tf.transpose(logi_class, [0, 2, 3, 1])
                soft_class = tf.nn.softmax(logi_class, axis=-1)

            #### Nuclei Pixels (NP)
            logi_np = Conv2D('conv_out_np',
                             npx,
                             2,
                             1,
                             use_bias=True,
                             activation=tf.identity)
            logi_np = tf.transpose(logi_np, [0, 2, 3, 1])
            soft_np = tf.nn.softmax(logi_np, axis=-1)
            prob_np = tf.identity(soft_np[..., 1], name='predmap-prob-np')
            prob_np = tf.expand_dims(prob_np, axis=-1)

            #### Horizontal-Vertival (HV)
            logi_hv = Conv2D('conv_out_hv',
                             hv,
                             2,
                             1,
                             use_bias=True,
                             activation=tf.identity)
            logi_hv = tf.transpose(logi_hv, [0, 2, 3, 1])
            prob_hv = tf.identity(logi_hv, name='predmap-prob-hv')
            pred_hv = tf.identity(logi_hv, name='predmap-hv')

            # * channel ordering: type-map, segmentation map
            # encoded so that inference can extract all output at once
            if self.type_classification:
                predmap_coded = tf.concat([soft_class, prob_np, pred_hv],
                                          axis=-1,
                                          name='predmap-coded')
            else:
                predmap_coded = tf.concat([prob_np, pred_hv],
                                          axis=-1,
                                          name='predmap-coded')
        ####
        def get_gradient_hv(l, h_ch, v_ch):
            """
            Calculate the horizontal partial differentiation for horizontal channel
            and the vertical partial differentiation for vertical channel.
            The partial differentiation is approximated by calculating the central differnce
            which is obtained by using Sobel kernel of size 5x5. The boundary is zero-padded
            when channel is convolved with the Sobel kernel.
            Args:
                l (tensor): tensor of shape NHWC with C should be 2 (1 channel for horizonal 
                            and 1 channel for vertical)
                h_ch(int) : index within C axis of `l` that corresponds to horizontal channel
                v_ch(int) : index within C axis of `l` that corresponds to vertical channel
            """
            def get_sobel_kernel(size):
                assert size % 2 == 1, 'Must be odd, get size=%d' % size

                h_range = np.arange(-size // 2 + 1,
                                    size // 2 + 1,
                                    dtype=np.float32)
                v_range = np.arange(-size // 2 + 1,
                                    size // 2 + 1,
                                    dtype=np.float32)
                h, v = np.meshgrid(h_range, v_range)
                kernel_h = h / (h * h + v * v + 1.0e-15)
                kernel_v = v / (h * h + v * v + 1.0e-15)
                return kernel_h, kernel_v

            mh, mv = get_sobel_kernel(5)
            mh = tf.constant(mh, dtype=tf.float32)
            mv = tf.constant(mv, dtype=tf.float32)

            mh = tf.reshape(mh, [5, 5, 1, 1])
            mv = tf.reshape(mv, [5, 5, 1, 1])

            # central difference to get gradient, ignore the boundary problem
            h = tf.expand_dims(l[..., h_ch], axis=-1)
            v = tf.expand_dims(l[..., v_ch], axis=-1)
            dh = tf.nn.conv2d(h, mh, strides=[1, 1, 1, 1], padding='SAME')
            dv = tf.nn.conv2d(v, mv, strides=[1, 1, 1, 1], padding='SAME')
            output = tf.concat([dh, dv], axis=-1)
            return output

        def loss_mse(true, pred, name=None):
            ### regression loss
            loss = pred - true
            loss = tf.reduce_mean(loss * loss, name=name)
            return loss

        def loss_msge(true, pred, focus, name=None):
            focus = tf.stack([focus, focus], axis=-1)
            pred_grad = get_gradient_hv(pred, 1, 0)
            true_grad = get_gradient_hv(true, 1, 0)
            loss = pred_grad - true_grad
            loss = focus * (loss * loss)
            # artificial reduce_mean with focus region
            loss = tf.reduce_sum(loss) / (tf.reduce_sum(focus) + 1.0e-8)
            loss = tf.identity(loss, name=name)
            return loss

        ####
        if get_current_tower_context().is_training:
            #---- LOSS ----#
            loss = 0
            for term, weight in self.loss_term.items():
                if term == 'mse':
                    term_loss = loss_mse(true_hv, pred_hv, name='loss-mse')
                elif term == 'msge':
                    focus = truemap_coded[..., 0]
                    term_loss = loss_msge(true_hv,
                                          pred_hv,
                                          focus,
                                          name='loss-msge')
                elif term == 'bce':
                    term_loss = categorical_crossentropy(soft_np, one_np)
                    term_loss = tf.reduce_mean(term_loss, name='loss-bce')
                elif 'dice' in self.loss_term:
                    term_loss = dice_loss(soft_np[...,0], one_np[...,0]) \
                              + dice_loss(soft_np[...,1], one_np[...,1])
                    term_loss = tf.identity(term_loss, name='loss-dice')
                else:
                    assert False, 'Not support loss term: %s' % term
                add_moving_summary(term_loss)
                loss += term_loss * weight

            if self.type_classification:
                term_loss = categorical_crossentropy(soft_class, one_type)
                term_loss = tf.reduce_mean(term_loss,
                                           name='loss-xentropy-class')
                add_moving_summary(term_loss)
                loss = loss + term_loss

                # term_loss = dice_loss(soft_class[...,0], one_type[...,0]) \
                #           + dice_loss(soft_class[...,1], one_type[...,1]) \
                #           + dice_loss(soft_class[...,2], one_type[...,2]) \
                #           + dice_loss(soft_class[...,3], one_type[...,3]) \
                #           + dice_loss(soft_class[...,4], one_type[...,4])

                term_loss = 0
                for type_id in range(self.nr_types):
                    term_loss += dice_loss(soft_class[..., type_id],
                                           one_type[..., type_id])

                term_loss = tf.identity(term_loss, name='loss-dice-class')
                add_moving_summary(term_loss)
                loss = loss + term_loss

            ### combine the loss into single cost function
            self.cost = tf.identity(loss, name='overall-loss')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            ### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC")

            pred_np = colorize(prob_np[..., 0], cmap='jet')
            true_np = colorize(true_np[..., 0], cmap='jet')

            pred_h = colorize(prob_hv[..., 0], vmin=-1, vmax=1, cmap='jet')
            pred_v = colorize(prob_hv[..., 1], vmin=-1, vmax=1, cmap='jet')
            true_h = colorize(true_hv[..., 0], vmin=-1, vmax=1, cmap='jet')
            true_v = colorize(true_hv[..., 1], vmin=-1, vmax=1, cmap='jet')

            if not self.type_classification:
                viz = tf.concat([
                    orig_imgs, pred_h, pred_v, pred_np, true_h, true_v, true_np
                ], 2)
            else:
                pred_type = tf.transpose(soft_class, (0, 1, 3, 2))
                pred_type = tf.reshape(pred_type, [-1, 80, 80 * self.nr_types])
                true_type = tf.cast(true_type[..., 0] / self.nr_classes,
                                    tf.float32)
                true_type = colorize(true_type, vmin=0, vmax=1, cmap='jet')
                pred_type = colorize(pred_type, vmin=0, vmax=1, cmap='jet')

                viz = tf.concat([
                    orig_imgs,
                    pred_h,
                    pred_v,
                    pred_np,
                    pred_type,
                    true_h,
                    true_v,
                    true_np,
                    true_type,
                ], 2)

            viz = tf.concat([viz[0], viz[-1]], axis=0)
            viz = tf.expand_dims(viz, axis=0)
            tf.summary.image('output', viz, max_outputs=1)

        return
Esempio n. 39
0
    def build_graph(self, A, B):
        A = tf.transpose(A / 255.0, [0, 3, 1, 2])
        B = tf.transpose(B / 255.0, [0, 3, 1, 2])

        # use the torch initializers
        with argscope([Conv2D, Conv2DTranspose, FullyConnected],
                      kernel_initializer=tf.variance_scaling_initializer(scale=0.333, distribution='uniform'),
                      use_bias=False), \
                argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
                argscope([Conv2D, Conv2DTranspose, BatchNorm], data_format='NCHW'):
            with tf.variable_scope('gen'):
                with tf.variable_scope('B'):
                    AB = self.generator(A)
                with tf.variable_scope('A'):
                    BA = self.generator(B)
                    ABA = self.generator(AB)
                with tf.variable_scope('B'):
                    BAB = self.generator(BA)

            viz_A_recon = tf.concat([A, AB, ABA], axis=3, name='viz_A_recon')
            viz_B_recon = tf.concat([B, BA, BAB], axis=3, name='viz_B_recon')
            tf.summary.image('Arecon',
                             tf.transpose(viz_A_recon, [0, 2, 3, 1]),
                             max_outputs=50)
            tf.summary.image('Brecon',
                             tf.transpose(viz_B_recon, [0, 2, 3, 1]),
                             max_outputs=50)

            with tf.variable_scope('discrim'):
                with tf.variable_scope('A'):
                    A_dis_real, A_feats_real = self.discriminator(A)
                    A_dis_fake, A_feats_fake = self.discriminator(BA)

                with tf.variable_scope('B'):
                    B_dis_real, B_feats_real = self.discriminator(B)
                    B_dis_fake, B_feats_fake = self.discriminator(AB)

        with tf.name_scope('LossA'):
            # reconstruction loss
            recon_loss_A = tf.reduce_mean(tf.squared_difference(A, ABA),
                                          name='recon_loss')
            # gan loss
            self.build_losses(A_dis_real, A_dis_fake)
            G_loss_A = self.g_loss
            D_loss_A = self.d_loss
            # feature matching loss
            fm_loss_A = self.get_feature_match_loss(A_feats_real, A_feats_fake)

        with tf.name_scope('LossB'):
            recon_loss_B = tf.reduce_mean(tf.squared_difference(B, BAB),
                                          name='recon_loss')
            self.build_losses(B_dis_real, B_dis_fake)
            G_loss_B = self.g_loss
            D_loss_B = self.d_loss
            fm_loss_B = self.get_feature_match_loss(B_feats_real, B_feats_fake)

        global_step = get_global_step_var()
        rate = tf.train.piecewise_constant(global_step, [np.int64(10000)],
                                           [0.01, 0.5])
        rate = tf.identity(rate, name='rate')  # TF issue#8594
        g_loss = tf.add_n([((G_loss_A + G_loss_B) * 0.1 +
                            (fm_loss_A + fm_loss_B) * 0.9) * (1 - rate),
                           (recon_loss_A + recon_loss_B) * rate],
                          name='G_loss_total')
        d_loss = tf.add_n([D_loss_A, D_loss_B], name='D_loss_total')

        self.collect_variables('gen', 'discrim')
        # weight decay
        wd_g = regularize_cost('gen/.*/W',
                               l2_regularizer(1e-5),
                               name='G_regularize')
        wd_d = regularize_cost('discrim/.*/W',
                               l2_regularizer(1e-5),
                               name='D_regularize')

        self.g_loss = g_loss + wd_g
        self.d_loss = d_loss + wd_d

        add_moving_summary(recon_loss_A, recon_loss_B, rate, g_loss, d_loss,
                           wd_g, wd_d)
Esempio n. 40
0
def net(observations, config):
    # observation space = shape=(batch_size, episode_length, 10, 14, 2)
    # action space = shape=(batch, episode_length, 23)
    batch_size = tf.shape(observations)[0]
    episode_len = tf.shape(observations)[1]

    input_ = tf.reshape(observations, shape=[batch_size, episode_len, observations.shape.as_list()[
                        2], functools.reduce(operator.mul, observations.shape.as_list()[3:], 1)])
    init_xavier_weights = tf.variance_scaling_initializer(
        scale=1.0, mode='fan_avg', distribution='uniform')
    init_output_weights = tf.variance_scaling_initializer(
        scale=config.init_output_factor, mode='fan_in', distribution='normal')
    # seperate value and policy
    with tf.variable_scope('o_trunk_policy'):
        conv1 = tf.layers.conv2d(
            inputs=input_,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights
        )
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
            operator.mul, conv2.shape.as_list()[2:], 1)])
        trunk_fc = tf.layers.dense(
            inputs=flatten,
            units=256,
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        with tf.variable_scope('o_crown'):
            # offensive
            off_fc = tf.layers.dense(
                inputs=trunk_fc,
                units=128,
                activation=tf.nn.relu,
                kernel_initializer=init_xavier_weights,
            )
            with tf.variable_scope('actions'):
                off_action_mean = tf.layers.dense(
                    inputs=off_fc,
                    units=12,
                    activation=tf.tanh,  # NOTE tanh is not good?
                    kernel_initializer=init_output_weights,
                )
            with tf.variable_scope('decision'):
                logits = tf.layers.dense(
                    inputs=off_fc,
                    units=3,
                    activation=None,
                    kernel_initializer=init_output_weights,
                )

    with tf.variable_scope('o_trunk_value'):
        conv1 = tf.layers.conv2d(
            inputs=input_,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights
        )
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
            operator.mul, conv2.shape.as_list()[2:], 1)])
        trunk_fc = tf.layers.dense(
            inputs=flatten,
            units=256,
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        with tf.variable_scope('o_crown'):
            # offensive
            off_fc = tf.layers.dense(
                inputs=trunk_fc,
                units=128,
                activation=tf.nn.relu,
                kernel_initializer=init_xavier_weights,
            )
            off_value = tf.layers.dense(
                inputs=off_fc,
                units=1,
                activation=None,
                kernel_initializer=init_output_weights,
            )
            off_value = tf.reshape(
                off_value, shape=[batch_size, episode_len])
            off_value = tf.check_numerics(off_value, 'off_value')

    # with tf.variable_scope('o_trunk'):
    #     conv1 = tf.layers.conv2d(
    #         inputs=input_,
    #         filters=64,
    #         kernel_size=[1, 3],
    #         padding='same',
    #         activation=tf.nn.relu,
    #         kernel_initializer=init_xavier_weights
    #     )
    #     conv2 = tf.layers.conv2d(
    #         inputs=conv1,
    #         filters=64,
    #         kernel_size=[1, 3],
    #         padding='same',
    #         activation=tf.nn.relu,
    #         kernel_initializer=init_xavier_weights,
    #     )
    #     flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
    #         operator.mul, conv2.shape.as_list()[2:], 1)])
    #     trunk_fc = tf.layers.dense(
    #         inputs=flatten,
    #         units=128,
    #         activation=tf.nn.relu,
    #         kernel_initializer=init_xavier_weights,
    #     )
    #     with tf.variable_scope('o_crown'):
    #         # offensive
    #         off_fc = tf.layers.dense(
    #             inputs=trunk_fc,
    #             units=64,
    #             activation=tf.nn.relu,
    #             kernel_initializer=init_xavier_weights,
    #         )
    #         with tf.variable_scope('policy'):
    #             with tf.variable_scope('actions'):
    #                 off_action_mean = tf.layers.dense(
    #                     inputs=off_fc,
    #                     units=12,
    #                     activation=tf.tanh,  # NOTE tanh is not good?
    #                     kernel_initializer=init_output_weights,
    #                 )
    #             with tf.variable_scope('decision'):
    #                 logits = tf.layers.dense(
    #                     inputs=off_fc,
    #                     units=3,
    #                     activation=None,
    #                     kernel_initializer=init_output_weights,
    #                 )
    #         with tf.variable_scope('value'):
    #             off_value = tf.layers.dense(
    #                 inputs=off_fc,
    #                 units=1,
    #                 activation=None,
    #                 kernel_initializer=init_output_weights,
    #             )
    #             off_value = tf.reshape(
    #                 off_value, shape=[batch_size, episode_len])
    #             off_value = tf.check_numerics(off_value, 'off_value')

    with tf.variable_scope('d_trunk'):
        conv1 = tf.layers.conv2d(
            inputs=input_,
            filters=64,
            kernel_size=[1, 3],
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=64,
            kernel_size=[1, 3],
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
            operator.mul, conv2.shape.as_list()[2:], 1)])
        trunk_fc = tf.layers.dense(
            inputs=flatten,
            units=128,
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        with tf.variable_scope('d_crown'):
            # defensive
            def_fc = tf.layers.dense(
                inputs=trunk_fc,
                units=64,
                activation=tf.nn.relu,
                kernel_initializer=init_xavier_weights,
            )
            with tf.variable_scope('policy'):
                with tf.variable_scope('actions'):
                    def_action_mean = tf.layers.dense(
                        inputs=def_fc,
                        units=10,
                        activation=tf.tanh,  # NOTE tanh is not good?
                        kernel_initializer=init_output_weights,
                    )
            with tf.variable_scope('value'):
                def_value = tf.layers.dense(
                    inputs=def_fc,
                    units=1,
                    activation=None,
                    kernel_initializer=init_output_weights,
                )
                def_value = tf.reshape(
                    def_value, shape=[batch_size, episode_len])
                def_value = tf.check_numerics(def_value, 'def_value')

    return logits, off_action_mean, off_value, def_action_mean, def_value
Esempio n. 41
0
 def _initializer(self, params):
     return tf.variance_scaling_initializer(mode="fan_avg",
                                            distribution="uniform",
                                            dtype=self.dtype)
Esempio n. 42
0
def train_my_lstm():
    # Data
    train_size = 10000
    test_size = 100
    batch_size = 100
    time_steps = 5
    num_features = 1

    # problem_type = 'regression'
    # X_train, y_train = sin_data(train_size, time_steps=time_steps)
    # X_test, y_test = sin_data(test_size, time_steps=time_steps)

    problem_type = 'binary_classification'
    X_train, y_train = binary_data(train_size, time_steps=time_steps)
    X_test, y_test = binary_data(test_size, time_steps=time_steps)

    # Place holders. Do NOT write None for batch_size
    inputs = tf.placeholder(tf.float32,
                            shape=[batch_size, time_steps, num_features])
    truth = tf.placeholder(tf.float32, shape=[batch_size, 1])

    initializer = tf.variance_scaling_initializer(scale=2.0)

    # Network structure: 3D LSTM - Dense(1)
    X = my_3d_lstm(inputs)
    X = tf.layers.flatten(X)
    outputs = tf.layers.dense(X, 1, kernel_initializer=initializer)

    if problem_type == 'binary_classification':
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=truth,
                                                       logits=outputs)
        loss = tf.reduce_mean(loss)
    elif problem_type == 'regression':
        loss = tf.nn.l2_loss(truth - outputs) / batch_size

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    optimizer = optimizer.minimize(loss)

    # Initialize and run the graph
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    for epoch_index in range(20):
        for batch_index in range(train_size // batch_size):
            X_train_batch = X_train[batch_index *
                                    batch_size:(batch_index + 1) * batch_size]
            y_train_batch = y_train[batch_index *
                                    batch_size:(batch_index + 1) * batch_size]

            results = sess.run(optimizer,
                               feed_dict={
                                   inputs: X_train_batch,
                                   truth: y_train_batch
                               })
            if (batch_index % 10 == 0):
                results_train_batch = sess.run([outputs, loss],
                                               feed_dict={
                                                   inputs: X_train_batch,
                                                   truth: y_train_batch
                                               })
                results_test = sess.run([outputs, loss],
                                        feed_dict={
                                            inputs: X_test,
                                            truth: y_test
                                        })
                print('------------------------------')
                print('Epoch %d. Batch %d.' % (epoch_index, batch_index))
                print('Train accuracy: %f. Loss: %f.' %
                      (np.mean((results_train_batch[0] > 0) == y_train_batch),
                       results_train_batch[1]))
                # print('Train accuracy: %f. Loss: %f.' % (np.mean(np.abs(results_train_batch[0]-y_train_batch)), results_train_batch[1]))
                print('Test accuracy: %f. Loss: %f.' % (np.mean(
                    (results_test[0] > 0) == y_test), results_test[1]))
Esempio n. 43
0
    def __build(self):
        with self._graph.as_default():
            with tf.variable_scope("inputs"):
                # input arguments
                self._features = tf.placeholder(tf.float32, shape=[None, self.t_bins, self.f_bins, 2*self.n_mic])
                self._targets  = tf.placeholder(tf.float32, shape=[None, self.t_bins, self.f_bins, 2*self.n_src])
                global_step    = tf.Variable(1, trainable=False)
                
                # preprocessing
                feature_magnitude = self._features[..., :self.n_mic]
                target_magnitude  = self._targets[..., :self.n_src]
                feature_phase     = self._features[..., self.n_mic:]
                target_phase      = self._targets[..., self.n_src:]
                phase_difference  = target_phase - self._features[..., self.n_mic:self.n_mic+1]
                target_real       = target_magnitude * tf.cos(phase_difference)
                target_image      = target_magnitude * tf.sin(phase_difference)
            
            with tf.variable_scope("cnn", initializer=tf.keras.initializers.Orthogonal(gain=1.0),
                                          regularizer=tf.contrib.layers.l2_regularizer(scale=1e-6)):
                conv = tf.layers.conv2d(feature_magnitude, 64, (3, 3), (1, 1), 
                                        "same", activation=tf.nn.relu)
                conv = tf.layers.max_pooling2d(conv, [1, 4], [1, 4], "valid")
                conv = tf.layers.conv2d(conv, 64, (3, 3), (1, 1), 
                                        "same", activation=tf.nn.relu)
                conv = tf.layers.max_pooling2d(conv, [1, 2], [1, 2], "valid")
                conv = tf.layers.conv2d(conv, 64, (3, 3), (1, 1), 
                                        "same", activation=tf.nn.relu)
                conv = tf.layers.max_pooling2d(conv, [1, 2], [1, 2], "valid")
                conv = tf.reshape(conv, (-1, self.t_bins, 64*32))
                conv = tf.unstack(conv, axis=1)
            
            with tf.variable_scope("rnn", initializer=tf.variance_scaling_initializer(), 
                                          regularizer=tf.contrib.layers.l2_regularizer(scale=1e-6)):
                # cells formation
                cells_forward  = []
                cells_backward = []
                for i in range(3):
                    cell = tf.nn.rnn_cell.GRUCell(num_units=1024)
                    cells_forward.append(cell)
                    cell = tf.nn.rnn_cell.GRUCell(num_units=1024)
                    cells_backward.append(cell)

                # rnn formation
                rnn_forward  = tf.nn.rnn_cell.MultiRNNCell(cells=cells_forward)
                rnn_backward = tf.nn.rnn_cell.MultiRNNCell(cells=cells_backward)
                rnn, _, _ = tf.nn.static_bidirectional_rnn(rnn_forward, rnn_backward, conv, dtype=tf.float32)
                rnn = tf.stack(rnn, axis=1)
                rnn = tf.reshape(rnn, [-1, self.t_bins, 2048])
            
            with tf.variable_scope("fnn", initializer=tf.variance_scaling_initializer(), 
                                          regularizer=tf.contrib.layers.l2_regularizer(scale=1e-6)):
                fnn = tf.layers.dense(rnn, units=self.f_bins*self.n_src)
                fnn = tf.nn.relu(fnn)
            
            with tf.variable_scope("mask", initializer=tf.keras.initializers.Orthogonal(gain=1.0),
                                           regularizer=tf.contrib.layers.l2_regularizer(1e-6)):
                # mask for real part
                mask_real = tf.layers.dense(fnn, units=self.f_bins*self.n_src)
                mask_real = tf.reshape(mask_real, [-1, self.t_bins, self.f_bins, self.n_src])
                # mask_rv = 1 - tf.reduce_sum(mask_re, axis=-1, keepdims=True)
                # self._mask_re = tf.concat([mask_re, mask_rv], axis=-1)

                # mask for imag part
                mask_image = tf.layers.dense(fnn, units=self.f_bins*self.n_src)
                mask_image = tf.reshape(mask_image, [-1, self.t_bins, self.f_bins, self.n_src])
                # mask_iv = 1 - tf.reduce_sum(mask_im, axis=-1, keepdims=True)
                # self._mask_im = tf.concat([mask_im, mask_iv], axis=-1)

            with tf.variable_scope("outputs"):
                # logits layer
                logits_real  = mask_real  * feature_magnitude[..., :1]
                logits_image = mask_image * feature_magnitude[..., :1]
                self._logits = tf.concat((logits_real, logits_image), axis=-1)
                # logit_re     = tf.concat((logits_real, mask_rv * ftr_mgt[..., :1]), axis=-1)
                # logit_im     = tf.concat((logit_im, mask_iv * ftr_mgt[..., :1]), axis=-1)
                
                # regression: MSE & L2-regularization & permutational loss
                self._loss = DTLoss(target_real, logits_real, self.n_src) + \
                             DTLoss(target_image, logits_image, self.n_src) + \
                             tf.losses.get_regularization_loss()
                
                # backward
                '''
                lr = tf.train.exponential_decay(
                    learning_rate=1e-3,
                    global_step=global_step,
                    decay_steps=self.decay_step,
                    decay_rate=0.1,
                    staircase=True
                )
                '''
                lr = 1e-3
                optimizer     = tf.train.AdamOptimizer(lr)
                self.minimize = optimizer.minimize(loss=self._loss)

                # operation
                self._session = tf.Session()
                self._session.run(tf.global_variables_initializer())
Esempio n. 44
0
    def cnn(self):
        with tf.name_scope('embedding_layer'):
            embeddings = self.random_embedding(self.config.vocab_size,
                                               self.config.embedding_dim)
            embedding = tf.Variable(embeddings,
                                    dtype=tf.float32,
                                    trainable=True,
                                    name='word_embedding')
            embeddings_inputs = tf.nn.embedding_lookup(embedding, self.content)
            # embeddings_inputs[batch_size, sequence_length, embedding_dim]
            # 需要将输入转换成四维的矩阵,最后一维为深度
            # embeddings_inputs_expanded[batch_size, sequence_length, embedding_dim, input_depth]
            self.embeddings_inputs_expanded = tf.expand_dims(
                embeddings_inputs, -1)

        with tf.name_scope('cnn_layer'):
            # shape = [kernel_height, kernel_width, input_depth, output_depth]
            filter_weights = tf.get_variable(
                name='weights',
                shape=[
                    self.config.kernel_size, self.config.embedding_dim, 1,
                    self.config.num_filters
                ],
                initializer=tf.variance_scaling_initializer(),
                dtype=tf.float32)
            biases = tf.get_variable(name='biases',
                                     shape=[self.config.num_filters],
                                     initializer=tf.zeros_initializer(),
                                     dtype=tf.float32)
            ''''
            strides=[batch_stride, height_stride, width_stride, depth_stride],第一个和第四个维度要求为1,因为卷积层的步长只对矩阵的长和宽有效。
            padding  VALID: 不填充;  SAME: 全0填充;
            不同的填充方法最终导致的输出矩阵的大小是不一样的,具体的计算方法为:
            使用全0填充 output_height=[input_height/stride_height]  output_width=[input_width/stride_width]
            不填充 output_height=[(input_height-filter_height+1)/stride_height]  output_width=[(input_width-filter_width+1)/stride_width]
            '''
            conv = tf.nn.conv2d(self.embeddings_inputs_expanded,
                                filter_weights,
                                strides=[1, 1, 1, 1],
                                padding='VALID')
            conv = tf.nn.relu(tf.nn.bias_add(conv, biases), name='conv')

            # ksize=[batch_kernel_size, height_kernel, width_kernel, depth_kernel_size],第一个和第四个维度通常为1。
            # 这里设置过滤器的尺寸为在卷积完之后的高度和宽度,目的是池化完之后的结果第二个维度和第三个维度都为1,方便后边计算。
            pool = tf.nn.max_pool(conv,
                                  ksize=[
                                      1, self.config.seq_length -
                                      self.config.kernel_size + 1, 1, 1
                                  ],
                                  strides=[1, 1, 1, 1],
                                  padding='VALID',
                                  name='pool')

            # 将池化的结果修改维度,修改成二维矩阵,因为第二维和第三维本身就是1,那么最后的结果为[batch_size,num_filters]
            h = tf.reshape(pool, [-1, self.config.num_filters])
            w = tf.get_variable(
                name='w',
                shape=[self.config.num_filters, self.config.num_classes],
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)
            b = tf.get_variable(name='b',
                                shape=[self.config.num_classes],
                                initializer=tf.zeros_initializer(),
                                dtype=tf.float32)
            fc = tf.matmul(h, w) + b
            self.logtis = tf.nn.dropout(fc, self.config.dropout_prob)

        with tf.name_scope('optimize_layer'):
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logtis,
                                                           labels=self.label))

            self.optimizer = tf.train.AdamOptimizer(
                self.config.learning_rate).minimize(self.loss)

        with tf.name_scope('score'):
            self.predict_label = tf.argmax(self.logtis, 1)
            correct_pred = tf.equal(tf.argmax(self.logtis, 1),
                                    tf.argmax(self.label, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Esempio n. 45
0
            pool5 = tf.layers.dropout(pool5, rate=pooldropout_rate, seed=115, training=training)

    # Flatten output
    with tf.name_scope('flatten') as scope:
        flat_output = tf.contrib.layers.flatten(pool5)

        # dropout at fc rate
        flat_output = tf.layers.dropout(flat_output, rate=fcdropout_rate, seed=116, training=training)

    # Fully connected layer 1
    with tf.name_scope('fc1') as scope:
        fc1 = tf.layers.dense(
            flat_output,
            2048,
            activation=None,
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=117),
            bias_initializer=tf.zeros_initializer(),
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=lamF),
            name="fc1"
        )

        bn_fc1 = tf.layers.batch_normalization(
            fc1,
            axis=-1,
            momentum=0.9,
            epsilon=epsilon,
            center=True,
            scale=True,
            beta_initializer=tf.zeros_initializer(),
            gamma_initializer=tf.ones_initializer(),
            moving_mean_initializer=tf.zeros_initializer(),
Esempio n. 46
0
    def build_predict(self,
                      inputs,
                      reverse_preds=None,
                      embed_penultimate=False,
                      target_subset=None,
                      save_reprs=False):
        """Construct per-location real-valued predictions."""
        assert inputs is not None
        print('Targets pooled by %d to length %d' %
              (self.hp.target_pool, self.hp.seq_length // self.hp.target_pool))

        if self.hp.augment_mutation > 0:
            # sample mutation binary mask across sequences
            mut_mask_probs = self.hp.augment_mutation * np.ones(
                (self.hp.seq_length, 1))
            mut_mask_dist = tfp.distributions.Bernoulli(probs=mut_mask_probs,
                                                        dtype=tf.float32)
            mut_mask = mut_mask_dist.sample(tf.shape(inputs)[0])

            # sample random nucleotide for mutations
            mut_1hot_probs = 0.25 * np.ones((self.hp.seq_length, 4))
            mut_1hot_dist = tfp.distributions.OneHotCategorical(
                probs=mut_1hot_probs, dtype=tf.float32)
            mut_1hot = mut_1hot_dist.sample(tf.shape(inputs)[0])

            # modify sequence
            inputs_mut = inputs - mut_mask * inputs + mut_mask * mut_1hot
            inputs = tf.cond(self.is_training, lambda: inputs_mut,
                             lambda: inputs)

        ###################################################
        # convolution layers
        ###################################################
        filter_weights = []
        layer_reprs = [inputs]

        seqs_repr = inputs
        for layer_index in range(self.hp.cnn_layers):
            with tf.variable_scope('cnn%d' % layer_index, reuse=tf.AUTO_REUSE):
                # convolution block
                args_for_block = self._make_conv_block_args(
                    layer_index, layer_reprs)
                seqs_repr = layers.conv_block(seqs_repr=seqs_repr,
                                              **args_for_block)

                # save representation
                layer_reprs.append(seqs_repr)

        if save_reprs:
            self.layer_reprs = layer_reprs

        # final nonlinearity
        if self.hp.nonlinearity == 'relu':
            seqs_repr = tf.nn.relu(seqs_repr)
        elif self.hp.nonlinearity == 'gelu':
            seqs_repr = tf.nn.sigmoid(1.702 * seqs_repr) * seqs_repr
        else:
            print('Unrecognized nonlinearity "%s"' % self.hp.nonlinearity,
                  file=sys.stderr)
            exit(1)

        ###################################################
        # slice out side buffer
        ###################################################

        # update batch buffer to reflect pooling
        seq_length = seqs_repr.shape[1].value
        pool_preds = self.hp.seq_length // seq_length
        assert self.hp.batch_buffer % pool_preds == 0, (
            'batch_buffer %d not divisible'
            ' by the CNN pooling %d') % (self.hp.batch_buffer, pool_preds)
        batch_buffer_pool = self.hp.batch_buffer // pool_preds

        # slice out buffer
        seq_length = seqs_repr.shape[1]
        seqs_repr = seqs_repr[:, batch_buffer_pool:seq_length -
                              batch_buffer_pool, :]
        seq_length = seqs_repr.shape[1]

        ###################################################
        # final layer
        ###################################################
        if embed_penultimate:
            final_repr = seqs_repr
        else:
            with tf.variable_scope('final', reuse=tf.AUTO_REUSE):
                final_filters = self.hp.sum_targets * self.hp.target_classes
                final_repr = tf.layers.dense(
                    inputs=seqs_repr,
                    units=final_filters,
                    activation=None,
                    kernel_initializer=tf.variance_scaling_initializer(
                        scale=2.0, mode='fan_in'),
                    kernel_regularizer=tf.contrib.layers.l1_regularizer(
                        self.hp.final_l1_scale))
                print('Convolution w/ %d %dx1 filters to final targets' % \
                      (final_filters, seqs_repr.shape[2]))

                if target_subset is not None:
                    # get convolution parameters
                    filters_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, 'final/dense/kernel')[0]
                    bias_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, 'final/dense/bias')[0]

                    # subset to specific targets
                    filters_subset = tf.gather(filters_full,
                                               target_subset,
                                               axis=1)
                    bias_subset = tf.gather(bias_full, target_subset, axis=0)

                    # substitute a new limited convolution
                    final_repr = tf.tensordot(seqs_repr, filters_subset, 1)
                    final_repr = tf.nn.bias_add(final_repr, bias_subset)

                    # update # targets
                    self.hp.sum_targets = len(target_subset)

                # expand length back out
                if self.hp.target_classes > 1:
                    final_repr = tf.reshape(
                        final_repr, (-1, seq_length, self.hp.sum_targets,
                                     self.hp.target_classes))

        # transform for reverse complement
        if reverse_preds is not None:
            final_repr = tf.cond(reverse_preds,
                                 lambda: tf.reverse(final_repr, axis=[1]),
                                 lambda: final_repr)

        ###################################################
        # link function
        ###################################################
        if embed_penultimate:
            predictions = final_repr
        else:
            # work-around for specifying my own predictions
            # self.preds_adhoc = tf.placeholder(
            #     tf.float32, shape=final_repr.shape, name='preds-adhoc')

            # float 32 exponential clip max
            exp_max = 50

            # choose link
            if self.hp.link in ['identity', 'linear']:
                predictions = tf.identity(final_repr, name='preds')

            elif self.hp.link == 'relu':
                predictions = tf.relu(final_repr, name='preds')

            elif self.hp.link == 'exp':
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max,
                                                   exp_max)
                predictions = tf.exp(final_repr_clip, name='preds')

            elif self.hp.link == 'exp_linear':
                predictions = tf.where(
                    final_repr > 0,
                    final_repr + 1,
                    tf.exp(tf.clip_by_value(final_repr, -exp_max, exp_max)),
                    name='preds')

            elif self.hp.link == 'softplus':
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max, 10000)
                predictions = tf.nn.softplus(final_repr_clip, name='preds')

            else:
                print('Unknown link function %s' % self.hp.link,
                      file=sys.stderr)
                exit(1)

            # clip
            if self.hp.target_clip is not None:
                predictions = tf.clip_by_value(predictions, 0,
                                               self.hp.target_clip)

            # sqrt
            if self.hp.target_sqrt:
                predictions = tf.sqrt(predictions)

        return predictions
Esempio n. 47
0
def conv2d_fixed_padding(inputs,filters,kernel_size,strides,data_format):
    if strides>1:
        inputs = fixed_padding(inputs,kernel_size,data_format) #variance_scaling_initializer默认生成N(0,1/sqrt(n))的分布,对于kernel=(k,k,f_i,f_o),n=k*k*f_i
    return tf.layers.conv2d(inputs = inputs,filters=filters,kernel_size=kernel_size,
        strides = strides,padding=('SAME' if strides==1 else "VALID"),use_bias = False,
        kernel_initializer=tf.variance_scaling_initializer(),data_format=data_format)
    def _get_network(self, inputs, use_histograms=False):
        screen = pysc2_common_net_funcs.preprocess_state_input(
            inputs, self._config)

        with tf.variable_scope('shared_spatial_network'):
            shared_spatial_net = network_utils.get_layers(
                screen,
                self._config['network_structure']['shared_spatial_network'],
                self._config['network_structure']['default_activation'],
                self._training,
                use_histograms=use_histograms)

        if self._config['network_structure'][
                'scale_gradients_at_shared_spatial_split']:
            with tf.variable_scope('spatial_gradient_scale'):
                # scale because multiple action component streams are meeting here
                # (always one more branch than number of spatial components)
                spatial_count = 1
                for name, using in self._action_components.items():
                    if using and name in pysc2_common_net_funcs.spatial_components:
                        spatial_count += 1
                scale = 1 / spatial_count
                shared_spatial_net = (1 - scale) * tf.stop_gradient(
                    shared_spatial_net) + scale * shared_spatial_net

        if self._config['dueling_network']:
            with tf.variable_scope('dueling_gradient_scale'):
                # scale the gradients entering last shared layer, as in original Dueling DQN paper
                scale = 1 / math.sqrt(2)
                shared_spatial_net = (1 - scale) * tf.stop_gradient(
                    shared_spatial_net) + scale * shared_spatial_net

        # for dueling net, split here
        if self._config['dueling_network']:
            with tf.variable_scope('value_network'):
                fc_value = network_utils.get_layers(
                    shared_spatial_net,
                    self._config['network_structure']['value_network'],
                    self._config['network_structure']['default_activation'],
                    self._training,
                    use_histograms=use_histograms)
                value = tf.layers.dense(
                    fc_value,
                    1,
                    activation=None,
                    kernel_initializer=tf.variance_scaling_initializer(
                        scale=2.0),
                    name='value')
        else:
            # returning this from the function for debugging purposes, so need it to exist if not using dueling net
            value = None

        with tf.variable_scope('shared_non_spatial_network'):
            shared_non_spatial = network_utils.get_layers(
                shared_spatial_net,
                self._config['network_structure']
                ['shared_non_spatial_network'],
                self._config['network_structure']['default_activation'],
                self._training,
                use_histograms=use_histograms)

        if self._config['network_structure'][
                'scale_gradients_at_shared_non_spatial_split']:
            with tf.variable_scope('non_spatial_gradient_scale'):
                # scale because multiple action component streams are meeting here
                non_spatial_count = 0
                for name, using in self._action_components.items():
                    if using and name not in pysc2_common_net_funcs.spatial_components:
                        non_spatial_count += 1
                scale = 1 / non_spatial_count
                shared_non_spatial = (1 - scale) * tf.stop_gradient(
                    shared_non_spatial) + scale * shared_non_spatial

        num_options = pysc2_common_net_funcs.get_num_options_per_function(
            self._config)

        # create each component stream
        component_streams = {}
        # final q vals with value added
        action_q_vals = {}
        # if another stream requires the output of another stream
        component_one_hots_or_embeddings = {}
        for c in pysc2_common_net_funcs.component_order:
            # are we using this component?
            if self._action_components[c]:
                with tf.variable_scope(c + '_stream'):
                    stream_input = shared_non_spatial
                    if c in pysc2_common_net_funcs.spatial_components:
                        stream_input = shared_spatial_net

                    # optionally one stream of fully connected layers per component
                    spec = self._config['network_structure'][
                        'component_stream_default']
                    if c in self._config['network_structure'][
                            'component_stream_specs']:
                        spec = self._config['network_structure'][
                            'component_stream_specs'][c]

                    # optionally feed one hot OR embedded versions of earlier stream outputs to this stream
                    dependencies = None
                    if self._config['network_structure'][
                            'use_stream_outputs_as_inputs_to_other_streams']:
                        if c in self._config['network_structure'][
                                'stream_dependencies']:
                            dependencies = []
                            for d in self._config['network_structure'][
                                    'stream_dependencies'][c]:
                                dependencies.append(
                                    component_one_hots_or_embeddings[d])

                    component_stream = network_utils.get_layers(
                        stream_input,
                        spec,
                        self._config['network_structure']
                        ['default_activation'],
                        self._training,
                        dependencies,
                        use_histograms=use_histograms)

                    if c not in pysc2_common_net_funcs.spatial_components or self._config[
                            'network_structure'][
                                'end_spatial_streams_with_dense_instead_of_flatten']:
                        # make a dense layer with width equal to number of possible actions
                        dense = tf.layers.Dense(num_options[c], name=c)
                        component_streams[c] = dense(component_stream)
                        if self._use_histograms:
                            weights = dense.kernel
                            bias = dense.bias
                            name = 'final_dense_' + c + '_'
                            tf.summary.histogram(name + 'weights', weights)
                            tf.summary.histogram(name + 'bias', bias)
                    else:
                        # flatten a conv output
                        component_streams[c] = tf.reshape(component_stream,
                                                          [-1, num_options[c]],
                                                          name=c)
                if self._use_histograms:
                    tf.summary.histogram('advantage_' + c,
                                         component_streams[c])
                if self._config['dueling_network']:
                    # action_q_vals is A(s,a), value is V(s)
                    # Q(s,a) = V(s) + A(s,a) - 1/|A| * SUM_a(A(s,a))
                    with tf.variable_scope('q_vals'):
                        advantage = component_streams[c]
                        action_q_vals[c] = tf.add(
                            value,
                            (advantage -
                             tf.reduce_mean(advantage, axis=1, keepdims=True)),
                            name=name)

                else:
                    action_q_vals[c] = component_streams[c]

                # filter out actions ('function') that are illegal for this state
                if c == 'function':
                    with tf.variable_scope('available_actions_mask'):
                        # available actions mask; avoids using negative infinity, and is the right size
                        action_neg_inf_q_vals = action_q_vals[
                            'function'] * 0 - 1000000
                        action_q_vals['function'] = tf.where(
                            inputs['available_actions'],
                            action_q_vals['function'], action_neg_inf_q_vals)

                if self._config['network_structure'][
                        'use_stream_outputs_as_inputs_to_other_streams']:
                    with tf.variable_scope('stream_action_one_hot'):
                        found_dependency = False
                        for stream, dependencies in self._config[
                                'network_structure'][
                                    'stream_dependencies'].items():
                            if self._action_components[
                                    stream] and c in dependencies:
                                found_dependency = True
                                break
                        if found_dependency:

                            action_index = tf.math.argmax(action_q_vals[c],
                                                          axis=-1)
                            if c == 'screen':
                                # special handling for screen->screen2 only
                                action_one_hot = tf.one_hot(
                                    action_index, num_options[c])
                                action_one_hot = tf.reshape(
                                    action_one_hot, [
                                        -1, self._config['env']['screen_size'],
                                        self._config['env']['screen_size'], 1
                                    ])
                                component_one_hots_or_embeddings[
                                    c] = tf.stop_gradient(action_one_hot)
                            if num_options[c] <= 10:
                                action_one_hot = tf.one_hot(
                                    action_index, num_options[c])
                                # argmax should be non-differentiable but just to remind myself use stop_gradient
                                component_one_hots_or_embeddings[
                                    c] = tf.stop_gradient(action_one_hot)
                            else:
                                component_one_hots_or_embeddings[
                                    c] = tf.keras.layers.Embedding(
                                        input_dim=num_options[c],
                                        output_dim=math.ceil(num_options[c]**(
                                            1 / 4.0)))(action_index)

        # return action_q_vals
        return action_q_vals, value, component_streams
Esempio n. 49
0
    def __call__(self, inputs, training):

        inputs = tf.identity(inputs, 'model_inputs')
        print('===================== model inputs', inputs)

        with self._model_variable_scope():

            # init conv
            if self.bottleneck:
                init_channel_num = self.k * 2
            else:
                init_channel_num = 16
            inputs = tf.layers.conv2d(
                inputs=inputs,
                filters=init_channel_num,
                kernel_size=3,
                strides=1,
                padding='SAME',
                use_bias=False,
                kernel_initializer=tf.variance_scaling_initializer(),
                name='init_conv')

            if not self.bottleneck:
                with tf.variable_scope('stage1'):
                    for block_num in range(self.N):
                        inputs = _add_layer(self, inputs, block_num, training)
                    inputs = _add_transition(self, inputs, 'transition1',
                                             training)

                with tf.variable_scope('stage2'):
                    for block_num in range(self.N):
                        inputs = _add_layer(self, inputs, block_num, training)
                    inputs = _add_transition(self, inputs, 'transition2',
                                             training)

                with tf.variable_scope('stage3'):
                    for block_num in range(self.N):
                        inputs = _add_layer(self, inputs, block_num, training)

            if self.bottleneck:
                with tf.variable_scope('stage1'):
                    for block_num in range(self.N):
                        inputs = _add_bottleneck_layer(self, inputs, block_num,
                                                       training)
                    inputs = _add_transition(self, inputs, 'transition1',
                                             training)

                with tf.variable_scope('stage2'):
                    for block_num in range(self.N):
                        inputs = _add_bottleneck_layer(self, inputs, block_num,
                                                       training)
                    inputs = _add_transition(self, inputs, 'transition2',
                                             training)

                with tf.variable_scope('stage3'):
                    for block_num in range(self.N):
                        inputs = _add_bottleneck_layer(self, inputs, block_num,
                                                       training)

            inputs = batch_norm(inputs, training, name='bnlast')
            inputs = tf.nn.relu(inputs)

            # global avg pooling
            inputs = tf.reduce_mean(inputs, [1, 2],
                                    keepdims=True,
                                    name='final_reduce_mean')
            inputs = tf.squeeze(inputs, [1, 2])

            inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)

            inputs = tf.identity(inputs, 'final_dense')
            print('===================== model outputs', inputs)

            return inputs
 def conv(self,
          num_out_channels,
          k_height,
          k_width,
          d_height=1,
          d_width=1,
          mode='SAME',
          input_layer=None,
          num_channels_in=None,
          use_batch_norm=None,
          stddev=None,
          activation='relu',
          bias=0.0,
          kernel_initializer=None):
     """Construct a conv2d layer on top of cnn."""
     if input_layer is None:
         input_layer = self.top_layer
     if num_channels_in is None:
         num_channels_in = self.top_size
     if stddev is not None and kernel_initializer is None:
         kernel_initializer = tf.truncated_normal_initializer(stddev=stddev)
     if kernel_initializer is None:
         kernel_initializer = tf.variance_scaling_initializer()
     name = 'conv' + str(self.counts['conv'])
     self.counts['conv'] += 1
     with tf.variable_scope(name):
         strides = [1, d_height, d_width, 1]
         if self.data_format == 'NCHW':
             strides = [strides[0], strides[3], strides[1], strides[2]]
         if mode != 'SAME_RESNET':
             conv = self._conv2d_impl(input_layer,
                                      num_channels_in,
                                      num_out_channels,
                                      kernel_size=[k_height, k_width],
                                      strides=[d_height, d_width],
                                      padding=mode,
                                      kernel_initializer=kernel_initializer)
         else:  # Special padding mode for ResNet models
             if d_height == 1 and d_width == 1:
                 conv = self._conv2d_impl(
                     input_layer,
                     num_channels_in,
                     num_out_channels,
                     kernel_size=[k_height, k_width],
                     strides=[d_height, d_width],
                     padding='SAME',
                     kernel_initializer=kernel_initializer)
             else:
                 rate = 1  # Unused (for 'a trous' convolutions)
                 kernel_height_effective = k_height + (k_height -
                                                       1) * (rate - 1)
                 pad_h_beg = (kernel_height_effective - 1) // 2
                 pad_h_end = kernel_height_effective - 1 - pad_h_beg
                 kernel_width_effective = k_width + (k_width - 1) * (rate -
                                                                     1)
                 pad_w_beg = (kernel_width_effective - 1) // 2
                 pad_w_end = kernel_width_effective - 1 - pad_w_beg
                 padding = [[0, 0], [pad_h_beg, pad_h_end],
                            [pad_w_beg, pad_w_end], [0, 0]]
                 if self.data_format == 'NCHW':
                     padding = [
                         padding[0], padding[3], padding[1], padding[2]
                     ]
                 padded_input_layer = tf.pad(input_layer, padding)
                 conv = self._conv2d_impl(
                     padded_input_layer,
                     num_channels_in,
                     num_out_channels,
                     kernel_size=[k_height, k_width],
                     strides=[d_height, d_width],
                     padding='VALID',
                     kernel_initializer=kernel_initializer)
         if use_batch_norm is None:
             use_batch_norm = self.use_batch_norm
         mlperf.logger.log_conv2d(input_tensor=input_layer,
                                  output_tensor=conv,
                                  stride_height=d_height,
                                  stride_width=d_width,
                                  filters=num_out_channels,
                                  initializer=kernel_initializer,
                                  use_bias=not use_batch_norm
                                  and bias is not None)
         if not use_batch_norm:
             if bias is not None:
                 biases = self.get_variable(
                     'biases', [num_out_channels],
                     self.variable_dtype,
                     self.dtype,
                     initializer=tf.constant_initializer(bias))
                 biased = tf.reshape(
                     tf.nn.bias_add(conv,
                                    biases,
                                    data_format=self.data_format),
                     conv.get_shape())
             else:
                 biased = conv
         else:
             self.top_layer = conv
             self.top_size = num_out_channels
             biased = self.batch_norm(**self.batch_norm_config)
         if activation == 'relu':
             mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
             conv1 = self.relu(biased)
         elif activation == 'linear' or activation is None:
             conv1 = biased
         elif activation == 'tanh':
             conv1 = tf.nn.tanh(biased)
         else:
             raise KeyError('Invalid activation type \'%s\'' % activation)
         self.top_layer = conv1
         self.top_size = num_out_channels
         return conv1
def q_network(state_tensor):
    inputs = state_tensor
    dense_outputs1 = tf.layers.dense(inputs=inputs, units=30, activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    dense_outputs2 = tf.layers.dense(inputs=dense_outputs1, units=30, activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    outputs = tf.layers.dense(inputs=dense_outputs2, units=n_outputs, kernel_initializer=tf.variance_scaling_initializer())
    return outputs
Esempio n. 52
0
  def build_network(self, inputs, is_training):
    """Builds the forward pass of the model.

    Args:
      inputs: the list of inputs, excluding labels
      is_training: if in the phrase of training.

    Returns:
      The logits of the model.
    """

    def inception_v1(inputs, k, l, m, n, p, q):
      cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
              [('conv', n, 1, 1), ('conv', p, 5, 5)],
              [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
      return inception_module(inputs, cols, self.channel_pos)

    if self.data_format == 'NCHW':
      inputs = tf.transpose(inputs, [0, 3, 1, 2])

    conv1 = tf.layers.conv2d(
            inputs=inputs,
            filters=64,
            kernel_size=7,
            strides=2,
            padding='same',
            data_format=self.channel_pos,
            activation=tf.nn.relu,
            kernel_initializer=tf.variance_scaling_initializer(),
            bias_initializer=tf.constant_initializer(0.0)
            )
    pool1 = tf.layers.max_pooling2d(
            inputs=conv1,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    conv2 = tf.layers.conv2d(
            inputs=pool1,
            filters=64,
            kernel_size=1,
            strides=1,
            padding='same',
            data_format=self.channel_pos,
            activation=tf.nn.relu,
            kernel_initializer=tf.variance_scaling_initializer(),
            bias_initializer=tf.constant_initializer(0.0)
            )
    conv3 = tf.layers.conv2d(
            inputs=conv2,
            filters=192,
            kernel_size=3,
            strides=1,
            padding='same',
            data_format=self.channel_pos,
            activation=tf.nn.relu,
            kernel_initializer=tf.variance_scaling_initializer(),
            bias_initializer=tf.constant_initializer(0.0)
            )
    pool3 = tf.layers.max_pooling2d(
            inputs=conv3,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    output1 = inception_v1(pool3, 64, 96, 128, 16, 32, 32)
    output2 = inception_v1(output1, 128, 128, 192, 32, 96, 64)
    pool4 = tf.layers.max_pooling2d(
            inputs=output2,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    output3 = inception_v1(pool4, 192, 96, 208, 16, 48, 64)
    output4 = inception_v1(output3, 160, 112, 224, 24, 64, 64)
    output5 = inception_v1(output4, 128, 128, 256, 24, 64, 64)
    output6 = inception_v1(output5, 112, 144, 288, 32, 64, 64)
    output7 = inception_v1(output6, 256, 160, 320, 32, 128, 128)
    pool8 = tf.layers.max_pooling2d(
            inputs=output7,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    output9 = inception_v1(pool8, 256, 160, 320, 32, 128, 128)
    output10 = inception_v1(output9, 384, 192, 384, 48, 128, 128)
    pool11 = tf.layers.average_pooling2d(
            inputs=output10,
            pool_size=7,
            strides=1,
            padding='valid',
            data_format=self.channel_pos
            )
    output12 = tf.reshape(pool11, [-1, 1024])
    stddev = np.sqrt(1.0 / self.num_classes)
    logits = tf.contrib.layers.fully_connected(
            inputs=output12,
            num_outputs=self.num_classes,
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev),
            biases_initializer=tf.constant_initializer(0.0)
            )
    return logits
Esempio n. 53
0
    def __init__(self, name, input_shape, output_dim, h_size=512, logdir=None):
        """A3C Network tensors and operations are defined here

        Args:
            name (str): The name of scope
            input_shape (list): The shape of input image [H, W, C]
            output_dim (int): Number of actions
            logdir (str, optional): directory to save summaries

        Notes:
            You should be familiar with Policy Gradients.
            The only difference between vanilla PG and A3C is that there is
            an operation to apply gradients manually
        """
        self.h_size = h_size
        with tf.variable_scope(name):
            #The network recieves a frame from the game, flattened into an array.
            #It then resizes it and processes it through four convolutional layers.
            self.stateInput = tf.placeholder(tf.float32,
                                             shape=[None, *input_shape],
                                             name='state')
            net = self.stateInput

            #init = tf.random_normal_initializer(mean=0.0, stddev=0.01, dtype=tf.float32)
            init = tf.variance_scaling_initializer(
                scale=2)  # He initialization
            net = tf.layers.conv2d(net,
                                   filters=32,
                                   kernel_size=8,
                                   strides=4,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)
            net = tf.layers.conv2d(net,
                                   filters=64,
                                   kernel_size=4,
                                   strides=2,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)
            net = tf.layers.conv2d(net,
                                   filters=64,
                                   kernel_size=3,
                                   strides=1,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)
            net = tf.layers.conv2d(net,
                                   filters=self.h_size,
                                   kernel_size=7,
                                   strides=1,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)

            #We take the output from the final convolutional layer and split it into separate advantage and value streams.

            self.streamAC, self.streamVC = tf.split(
                net, 2, 3)  # (N,1,1,512) --> (N,1,1,256), (N,1,1,256)

            self.streamAC = tf.layers.flatten(self.streamAC)
            self.Policy = tf.clip_by_value(
                tf.layers.dense(self.streamAC,
                                output_dim,
                                use_bias=True,
                                activation=tf.nn.softmax,
                                kernel_initializer=init), 1e-10, 1.)
            self.predict = tf.argmax(self.Policy, 1)

            self.streamVC = tf.layers.flatten(self.streamVC)
            self.Value = tf.layers.dense(self.streamVC,
                                         1,
                                         use_bias=False,
                                         activation=None,
                                         kernel_initializer=init)
            self.Value = tf.squeeze(self.Value)

            #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
            self.action = tf.placeholder(shape=[None],
                                         dtype=tf.int32,
                                         name='action_input')
            self.actions_onehot = tf.one_hot(self.action,
                                             output_dim,
                                             dtype=tf.float32,
                                             name='action_onehot')
            self.advantage = tf.placeholder(tf.float32,
                                            shape=[None],
                                            name="advantage_input")
            self.reward = tf.placeholder(tf.float32,
                                         shape=[None],
                                         name="reward_input")

            policy_gain = tf.boolean_mask(self.Policy, self.actions_onehot)
            policy_gain = tf.log(policy_gain) * self.advantage
            policy_gain = tf.reduce_mean(policy_gain, name="policy_gain")

            entropy = -tf.reduce_sum(self.Policy * tf.log(self.Policy), 1)
            entropy = tf.reduce_mean(entropy)

            value_loss = tf.losses.mean_squared_error(self.Value,
                                                      self.reward,
                                                      scope="value_loss")

            # Becareful negative sign because we only can minimize
            # we want to maximize policy gain and entropy (for exploration)

            self.total_loss = -policy_gain + 0.1 * value_loss - entropy * 0.02
            self.optimizer = tf.train.AdamOptimizer(learning_rate=0.00025)

        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope=name)
        self.gradients = self.optimizer.compute_gradients(
            self.total_loss, var_list)
        self.gradients_placeholders = []

        for grad, var in self.gradients:
            placeholder = tf.placeholder(var.dtype, shape=var.get_shape())
            placeholder = tf.clip_by_norm(placeholder, 40)
            self.gradients_placeholders.append((placeholder, var))
        self.apply_gradients = self.optimizer.apply_gradients(
            self.gradients_placeholders)

        if logdir:
            loss_summary = tf.summary.scalar("total_loss", self.total_loss)
            value_summary = tf.summary.histogram("values", self.values)

            self.summary_op = tf.summary.merge([loss_summary, value_summary])
            self.summary_writer = tf.summary.FileWriter(logdir)
    def __init__(
        self,
        observation_spec,
        conv_layer_params=None,
        input_fc_layer_params=(75, 40),
        lstm_size=(40, ),
        output_fc_layer_params=(75, 40),
        activation_fn=tf.keras.activations.relu,
        name='LSTMEncodingNetwork',
    ):
        """Creates an instance of `LSTMEncodingNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These feed into the
        recurrent layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These are applied on top
        of the recurrent layer.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.
    """
        kernel_initializer = tf.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal')

        input_encoder = encoding_network.EncodingNetwork(
            observation_spec,
            conv_layer_params=conv_layer_params,
            fc_layer_params=input_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer)

        # Create RNN cell
        if len(lstm_size) == 1:
            cell = tf.keras.layers.LSTMCell(lstm_size[0])
        else:
            cell = tf.keras.layers.StackedRNNCells(
                [tf.keras.layers.LSTMCell(size) for size in lstm_size])

        output_encoder = ([
            tf.keras.layers.Dense(num_units,
                                  activation=activation_fn,
                                  kernel_initializer=kernel_initializer,
                                  name='/'.join([name, 'dense']))
            for num_units in output_fc_layer_params
        ])

        state_spec = nest.map_structure(
            functools.partial(tensor_spec.TensorSpec,
                              dtype=tf.float32,
                              name='network_state_spec'), cell.state_size)

        super(LSTMEncodingNetwork,
              self).__init__(observation_spec=observation_spec,
                             action_spec=None,
                             state_spec=state_spec,
                             name=name)

        self._conv_layer_params = conv_layer_params
        self._input_encoder = input_encoder
        self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
        self._output_encoder = output_encoder
Esempio n. 55
0
    def inference(self, inputs, trainable=True):
        with tf.name_scope('norm'):
            inputs = tf.div(tf.cast(inputs, tf.float32), 255.0)
            images = tf.split(inputs, [1, 1, 1, 1], axis=3)
            for i in range(4):
                tf.summary.image('input_images_%d' % i, images[i])

        def act(inputs):
            return tf.nn.leaky_relu(inputs, alpha=0.01)

        initializer = tf.variance_scaling_initializer()
        with tf.name_scope('conv1'):
            conv = tf.contrib.layers.conv2d(inputs,
                                            32,
                                            stride=4,
                                            kernel_size=8,
                                            activation_fn=act,
                                            trainable=trainable,
                                            padding='SAME',
                                            weights_initializer=initializer)

        with tf.name_scope('conv2'):
            conv = tf.contrib.layers.conv2d(conv,
                                            64,
                                            stride=2,
                                            kernel_size=4,
                                            activation_fn=act,
                                            trainable=trainable,
                                            padding='SAME',
                                            weights_initializer=initializer)

        with tf.name_scope('conv3'):
            conv = tf.contrib.layers.conv2d(conv,
                                            64,
                                            stride=1,
                                            kernel_size=3,
                                            activation_fn=act,
                                            trainable=trainable,
                                            padding='SAME',
                                            weights_initializer=initializer)

        with tf.name_scope('fully_connected'):
            flatten = tf.contrib.layers.flatten(conv)
            fc = tf.contrib.layers.fully_connected(
                flatten,
                512,
                trainable=trainable,
                activation_fn=act,
                weights_initializer=initializer)

        with tf.name_scope('output'):
            w = tf.get_variable('ow',
                                shape=[512, self.config.action_size],
                                trainable=trainable,
                                initializer=initializer)
            b = tf.get_variable('ob',
                                shape=[self.config.action_size],
                                trainable=trainable,
                                initializer=tf.zeros_initializer())
            outputs = tf.add(tf.matmul(fc, w), b, name='q_values')
        return outputs
Esempio n. 56
0
def init_hidden_uniform():
    return tf.variance_scaling_initializer(scale=1.0 / 3.0,
                                           mode="fan_in",
                                           distribution="uniform")
Esempio n. 57
0
def fpn_model(features):
    """
    Args:
        features ([tf.Tensor]): ResNet features c2-c5

    Returns:
        [tf.Tensor]: FPN features p2-p6
    """
    assert len(features) == 4, features
    num_channel = cfg.FPN.NUM_CHANNEL

    use_gn = cfg.FPN.NORM == "GN"

    def upsample2x(name, x):
        return FixedUnPooling(
            name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first"
        )

        # tf.image.resize is, again, not aligned.
        # with tf.name_scope(name):
        #     shape2d = tf.shape(x)[2:]
        #     x = tf.transpose(x, [0, 2, 3, 1])
        #     x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
        #     x = tf.transpose(x, [0, 3, 1, 2])
        #     return x

    with argscope(
        Conv2D,
        data_format="channels_first",
        activation=tf.identity,
        use_bias=True,
        kernel_initializer=tf.variance_scaling_initializer(scale=1.0),
    ):
        lat_2345 = [
            Conv2D("lateral_1x1_c{}".format(i + 2), c, num_channel, 1)
            for i, c in enumerate(features)
        ]
        if use_gn:
            lat_2345 = [GroupNorm("gn_c{}".format(i + 2), c) for i, c in enumerate(lat_2345)]
        lat_sum_5432 = []
        for idx, lat in enumerate(lat_2345[::-1]):
            if idx == 0:
                lat_sum_5432.append(lat)
            else:
                lat = lat + upsample2x("upsample_lat{}".format(6 - idx), lat_sum_5432[-1])
                lat_sum_5432.append(lat)
        p2345 = [
            Conv2D("posthoc_3x3_p{}".format(i + 2), c, num_channel, 3)
            for i, c in enumerate(lat_sum_5432[::-1])
        ]
        if use_gn:
            p2345 = [GroupNorm("gn_p{}".format(i + 2), c) for i, c in enumerate(p2345)]
        p6 = MaxPooling(
            "maxpool_p6",
            p2345[-1],
            pool_size=1,
            strides=2,
            data_format="channels_first",
            padding="VALID",
        )
        return p2345 + [p6]
    def __call__(self,
                 inputs,
                 inputs_unpadded_length,
                 former_encoder_input,
                 multi_attention_bias,
                 targets=None):
        """Calculate target logits or inferred target sequences.

    Args:
      inputs: int tensor with shape [batch_size, input_length, hidden_size].
      inputs_unpadded_length: int tensor with shape [batch_size, ]. Indicate the actual length of each input.
      targets: None or int tensor with shape [batchCalculate target logits or inferred target sequences.

    Args:
      inputs: int tensor with shape [batch_size, input_length, hidden_size].
      inputs_unpadded_length: int tensor with shape [batch_size, ]. Indicate the actual length of each input.
      targets: None or int tensor with shape [batch_size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          output: [batch_size, decoded length]
          score: [batch_size, float]}
    _size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          output: [batch_size, decoded length]
          score: [batch_size, float]}
    """
        # Variance scaling is used here because it seems to work in many problems.
        # Other reasonable initializers may also work just as well.
        initializer = tf.variance_scaling_initializer(
            self.params["initializer_gain"],
            mode="fan_avg",
            distribution="uniform")
        with tf.variable_scope(self.scope, initializer=initializer):
            # Calculate attention bias for encoder self-attention and decoder
            # multi-headed attention layers.
            attention_bias = model_utils.get_padding_bias(
                inputs, inputs_unpadded_length)

            # Run the inputs through the encoder layer to map the symbol
            # representations to continuous representations.
            encoder_outputs = self.encode(inputs, attention_bias,
                                          inputs_unpadded_length,
                                          former_encoder_input,
                                          multi_attention_bias)

            # Generate output sequence if targets is None, or return logits if target
            # sequence is known.
            if targets is None:
                return self.predict(encoder_outputs, attention_bias)
            else:
                logits = self.decode(targets, encoder_outputs, attention_bias)
                return logits
Esempio n. 59
0
# Model architecture parameters
n_stocks = 500
n_neurons_1 = 1024
n_neurons_2 = 512
n_neurons_3 = 256
n_neurons_4 = 128
n_target = 1

# Placeholder
X = tf.placeholder(dtype=tf.float32, shape=[None, n_stocks])
Y = tf.placeholder(dtype=tf.float32, shape=[None])

# Initializers
sigma = 1
weight_initializer = tf.variance_scaling_initializer(mode="fan_avg",
                                                     distribution="uniform",
                                                     scale=sigma)
bias_initializer = tf.zeros_initializer()

# Layer 1: Variables for hidden weights and biases
W_hidden_1 = tf.Variable(weight_initializer([n_stocks, n_neurons_1]))
bias_hidden_1 = tf.Variable(bias_initializer([n_neurons_1]))

# Layer 2: Variables for hidden weights and biases
W_hidden_2 = tf.Variable(weight_initializer([n_neurons_1, n_neurons_2]))
bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2]))

# Layer 3: Variables for hidden weights and biases
W_hidden_3 = tf.Variable(weight_initializer([n_neurons_2, n_neurons_3]))
bias_hidden_3 = tf.Variable(bias_initializer([n_neurons_3]))
Esempio n. 60
0
def fpn_model(features):
    """
    Args:
        features ([tf.Tensor]): ResNet features c2-c5

    Returns:
        [tf.Tensor]: FPN features p2-p6
    """
    assert len(features) == 4, features
    num_channel = cfg.FPN.NUM_CHANNEL

    use_gn = cfg.FPN.NORM == 'GN'

    def upsample2x(name, x):
        try:
            resize = tf.compat.v2.image.resize_images
            with tf.name_scope(name):
                shp2d = tf.shape(x)[2:]
                x = tf.transpose(x, [0, 2, 3, 1])
                x = resize(x, shp2d * 2, 'nearest')
                x = tf.transpose(x, [0, 3, 1, 2])
                return x
        except AttributeError:
            return FixedUnPooling(name,
                                  x,
                                  2,
                                  unpool_mat=np.ones((2, 2), dtype='float32'),
                                  data_format='channels_first')

    with argscope(
            Conv2D,
            data_format='channels_first',
            activation=tf.identity,
            use_bias=True,
            kernel_initializer=tf.variance_scaling_initializer(scale=1.)):
        lat_2345 = [
            Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1)
            for i, c in enumerate(features)
        ]
        if use_gn:
            lat_2345 = [
                GroupNorm('gn_c{}'.format(i + 2), c)
                for i, c in enumerate(lat_2345)
            ]
        lat_sum_5432 = []
        for idx, lat in enumerate(lat_2345[::-1]):
            if idx == 0:
                lat_sum_5432.append(lat)
            else:
                lat = lat + upsample2x('upsample_lat{}'.format(6 - idx),
                                       lat_sum_5432[-1])
                lat_sum_5432.append(lat)
        p2345 = [
            Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3)
            for i, c in enumerate(lat_sum_5432[::-1])
        ]
        if use_gn:
            p2345 = [
                GroupNorm('gn_p{}'.format(i + 2), c)
                for i, c in enumerate(p2345)
            ]
        p6 = MaxPooling('maxpool_p6',
                        p2345[-1],
                        pool_size=1,
                        strides=2,
                        data_format='channels_first',
                        padding='VALID')
        return p2345 + [p6]