Esempi in Python per variance_scaling_initializer, esempi in Python per tensorflow.variance_scaling_initializer

Esempio n. 1

0

Mostra file

File: optimize.py Progetto: qixiuai/tensor2tensor

def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_INITIALIZER_GAIN,
                               value=hparams.initializer_gain,
                               hparams=hparams)

  if not tf.contrib.eager.in_eager_mode():
    tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  elif hparams.initializer == "xavier":
    return tf.contrib.layers.xavier_initializer()
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)

Esempio n. 2

0

Mostra file

File: my-cart-pole.py Progetto: sunmingtao/sample-code

def q_network(X_state, name):
    inputs = X_state
    with tf.variable_scope(name) as scope:
        dense_outputs = tf.layers.dense(inputs, 100, tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
        outputs = tf.layers.dense(dense_outputs, n_outputs, kernel_initializer=tf.variance_scaling_initializer())
    trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope.name)
    trainable_vars_by_name = {var.name[len(scope.name):]: var for var in trainable_vars}
    return outputs, trainable_vars_by_name

Esempio n. 3

0

Mostra file

File: model_builder.py Progetto: zeyu-h/tensor2tensor

def _get_variable_initializer(hparams):
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)

Esempio n. 4

0

Mostra file

File: basemodel.py Progetto: quanlzheng/tensorpack

def backbone_scope(freeze):
    """
    Args:
        freeze (bool): whether to freeze all the variables under the scope
    """
    def nonlin(x):
        x = get_norm()(x)
        return tf.nn.relu(x)

    with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \
            argscope(Conv2D, use_bias=False, activation=nonlin,
                     kernel_initializer=tf.variance_scaling_initializer(
                         scale=2.0, mode='fan_out')), \
            ExitStack() as stack:
        if cfg.BACKBONE.NORM in ['FreezeBN', 'SyncBN']:
            if freeze or cfg.BACKBONE.NORM == 'FreezeBN':
                stack.enter_context(argscope(BatchNorm, training=False))
            else:
                stack.enter_context(argscope(
                    BatchNorm, sync_statistics='nccl' if cfg.TRAINER == 'replicated' else 'horovod'))

        if freeze:
            stack.enter_context(freeze_variables(stop_gradient=False, skip_collection=True))
        else:
            # the layers are not completely freezed, but we may want to only freeze the affine
            if cfg.BACKBONE.FREEZE_AFFINE:
                stack.enter_context(custom_getter_scope(freeze_affine_getter))
        yield

Esempio n. 5

0

Mostra file

File: scaled_dot_product.py Progetto: ufal/neuralmonkey

    def __init__(self,
                 name: str,
                 n_heads: int,
                 keys_encoder: Attendable,
                 values_encoder: Attendable = None,
                 dropout_keep_prob: float = 1.0,
                 reuse: ModelPart = None,
                 save_checkpoint: str = None,
                 load_checkpoint: str = None,
                 initializers: InitializerSpecs = None) -> None:
        check_argument_types()
        BaseAttention.__init__(self, name, reuse, save_checkpoint,
                               load_checkpoint, initializers)

        self.n_heads = n_heads
        self.dropout_keep_prob = dropout_keep_prob

        self.keys_encoder = keys_encoder

        if values_encoder is not None:
            self.values_encoder = values_encoder
        else:
            self.values_encoder = self.keys_encoder

        if self.n_heads <= 0:
            raise ValueError("Number of heads must be greater than zero.")

        if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
            raise ValueError("Dropout keep prob must be inside (0,1].")

        self._variable_scope.set_initializer(tf.variance_scaling_initializer(
            mode="fan_avg", distribution="uniform"))

Esempio n. 6

0

Mostra file

File: alexnet.py Progetto: quanlzheng/tensorpack

    def get_logits(self, image):
        gauss_init = tf.random_normal_initializer(stddev=0.01)
        with argscope(Conv2D,
                      kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \
                argscope([Conv2D, FullyConnected], activation=tf.nn.relu), \
                argscope([Conv2D, MaxPooling], data_format='channels_last'):
            # necessary padding to get 55x55 after conv1
            image = tf.pad(image, [[0, 0], [2, 2], [2, 2], [0, 0]])
            l = Conv2D('conv1', image, filters=96, kernel_size=11, strides=4, padding='VALID')
            # size: 55
            visualize_conv1_weights(l.variables.W)
            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
            l = MaxPooling('pool1', l, 3, strides=2, padding='VALID')
            # 27
            l = Conv2D('conv2', l, filters=256, kernel_size=5, split=2)
            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
            l = MaxPooling('pool2', l, 3, strides=2, padding='VALID')
            # 13
            l = Conv2D('conv3', l, filters=384, kernel_size=3)
            l = Conv2D('conv4', l, filters=384, kernel_size=3, split=2)
            l = Conv2D('conv5', l, filters=256, kernel_size=3, split=2)
            l = MaxPooling('pool3', l, 3, strides=2, padding='VALID')

            l = FullyConnected('fc6', l, 4096,
                               kernel_initializer=gauss_init,
                               bias_initializer=tf.ones_initializer())
            l = Dropout(l, rate=0.5)
            l = FullyConnected('fc7', l, 4096, kernel_initializer=gauss_init)
            l = Dropout(l, rate=0.5)
        logits = FullyConnected('fc8', l, 1000, kernel_initializer=gauss_init)
        return logits

Esempio n. 7

0

Mostra file

File: CAM-resnet.py Progetto: quanlzheng/tensorpack

    def build_graph(self, image, label):
        image = image_preprocess(image, bgr=True)
        image = tf.transpose(image, [0, 3, 1, 2])

        cfg = {
            18: ([2, 2, 2, 2], preresnet_basicblock),
            34: ([3, 4, 6, 3], preresnet_basicblock),
        }
        defs, block_func = cfg[DEPTH]

        with argscope(Conv2D, use_bias=False,
                      kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'):
            convmaps = (LinearWrap(image)
                        .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
                        .MaxPooling('pool0', 3, strides=2, padding='SAME')
                        .apply2(preresnet_group, 'group0', block_func, 64, defs[0], 1)
                        .apply2(preresnet_group, 'group1', block_func, 128, defs[1], 2)
                        .apply2(preresnet_group, 'group2', block_func, 256, defs[2], 2)
                        .apply2(preresnet_group, 'group3new', block_func, 512, defs[3], 1)())
            print(convmaps)
            convmaps = GlobalAvgPooling('gap', convmaps)
            logits = FullyConnected('linearnew', convmaps, 1000)

        loss = compute_loss_and_error(logits, label)
        wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
        add_moving_summary(loss, wd_cost)
        return tf.add_n([loss, wd_cost], name='cost')

Esempio n. 8

0

Mostra file

File: attend.py Progetto: charlesjansen/quora-duplicate-questions

def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                       scope='additive-attention', reuse=False):
    """
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)).  v is a learnable vector and W is a learnable
    matrix. The rows of attn are softmax normalized.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

    Returns:
        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    """
    with tf.variable_scope(scope, reuse=reuse):
        aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
        bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
        aW = tf.expand_dims(aW, 2)
        bW = tf.expand_dims(bW, 1)
        v = tf.get_variable(
            name='dot_weights',
            initializer=tf.variance_scaling_initializer(),
            shape=[hidden_units]
        )
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)

Esempio n. 9

0

Mostra file

File: resnet.py Progetto: kltony/tensor2tensor

def conv2d_fixed_padding(inputs,
                         filters,
                         kernel_size,
                         strides,
                         data_format="channels_first"):
  """Strided 2-D convolution with explicit padding.

  The padding is consistent and is based only on `kernel_size`, not on the
  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).

  Args:
    inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
    filters: `int` number of filters in the convolution.
    kernel_size: `int` size of the kernel to be used in the convolution.
    strides: `int` strides of the convolution.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.

  Returns:
    A `Tensor` of shape `[batch, filters, height_out, width_out]`.
  """
  if strides > 1:
    inputs = fixed_padding(inputs, kernel_size, data_format=data_format)

  return tf.layers.conv2d(
      inputs=inputs,
      filters=filters,
      kernel_size=kernel_size,
      strides=strides,
      padding=("SAME" if strides == 1 else "VALID"),
      use_bias=False,
      kernel_initializer=tf.variance_scaling_initializer(),
      data_format=data_format)

Esempio n. 10

0

Mostra file

File: transformer.py Progetto: cybermaster/reference

  def __call__(self, inputs, targets=None):
    """Calculate target logits or inferred target sequences.

    Args:
      inputs: int tensor with shape [batch_size, input_length].
      targets: None or int tensor with shape [batch_size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          output: [batch_size, decoded length]
          score: [batch_size, float]}
    """
    # Variance scaling is used here because it seems to work in many problems.
    # Other reasonable initializers may also work just as well.
    initializer = tf.variance_scaling_initializer(
        self.params.initializer_gain, mode="fan_avg", distribution="uniform")
    with tf.variable_scope("Transformer", initializer=initializer):
      # Calculate attention bias for encoder self-attention and decoder
      # multi-headed attention layers.
      attention_bias = model_utils.get_padding_bias(inputs)

      # Run the inputs through the encoder layer to map the symbol
      # representations to continuous representations.
      encoder_outputs = self.encode(inputs, attention_bias)

      # Generate output sequence if targets is None, or return logits if target
      # sequence is known.
      if targets is None:
        return self.predict(encoder_outputs, attention_bias)
      else:
        logits = self.decode(targets, encoder_outputs, attention_bias)
        return logits

Esempio n. 11

0

Mostra file

File: embedders.py Progetto: 812864539/models

 def _fully_connected(self, x, out_dim):
   w = tf.get_variable(
       'DW', [x.get_shape()[1], out_dim],
       initializer=tf.variance_scaling_initializer(distribution='uniform'))
   b = tf.get_variable(
       'biases', [out_dim], initializer=tf.constant_initializer())
   return tf.nn.xw_plus_b(x, w, b)

Esempio n. 12

0

Mostra file

File: sequence_cnn_encoder.py Progetto: ufal/neuralmonkey

    def output(self) -> tf.Tensor:
        pooled_outputs = []
        for filter_size, num_filters in self.filters:
            with tf.variable_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, self.embedding_size, num_filters]
                w_filter = get_variable(
                    "conv_W", filter_shape,
                    initializer=tf.variance_scaling_initializer(
                        mode="fan_avg", distribution="uniform"))
                b_filter = get_variable(
                    "conv_bias", [num_filters],
                    initializer=tf.zeros_initializer())
                conv = tf.nn.conv1d(
                    self.embedded_inputs,
                    w_filter,
                    stride=1,
                    padding="VALID",
                    name="conv")

                # Apply nonlinearity
                conv_relu = tf.nn.relu(tf.nn.bias_add(conv, b_filter))

                # Max-pooling over the outputs
                pooled = tf.reduce_max(conv_relu, 1)
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        return tf.concat(pooled_outputs, axis=1)

Esempio n. 13

0

Mostra file

File: cifar10-preact18-mixup.py Progetto: quanlzheng/tensorpack

    def build_graph(self, image, label):
        assert tf.test.is_gpu_available()

        MEAN_IMAGE = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32)
        STD_IMAGE = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32)
        image = ((image / 255.0) - MEAN_IMAGE) / STD_IMAGE
        image = tf.transpose(image, [0, 3, 1, 2])

        pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform')
        with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
                argscope(Conv2D, kernel_initializer=pytorch_default_init):
            net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False)
            for i, blocks_in_module in enumerate(MODULE_SIZES):
                for j in range(blocks_in_module):
                    stride = 2 if j == 0 and i > 0 else 1
                    with tf.variable_scope("res%d.%d" % (i, j)):
                        net = preactivation_block(net, FILTER_SIZES[i], stride)
            net = GlobalAvgPooling('gap', net)
            logits = FullyConnected('linear', net, CLASS_NUM,
                                    kernel_initializer=tf.random_normal_initializer(stddev=1e-3))

        ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)
        ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss')

        single_label = tf.to_int32(tf.argmax(label, axis=1))
        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, single_label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'), ce_cost)
        add_param_summary(('.*/W', ['histogram']))

        # weight decay on all W matrixes. including convolutional layers
        wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')

        return tf.add_n([ce_cost, wd_cost], name='cost')

Esempio n. 14

0

Mostra file

File: utils.py Progetto: q64545/x-deeplearning

def get_tf_initializer(name="glorot"):
    if name == "const":
        return tf.constant_initializer(0.3)
    elif name == "glorot":
        return tf.variance_scaling_initializer(
            scale=1.0, mode="fan_avg", distribution="normal")
    elif name == "normal":
        return tf.truncated_normal_initializer(dtype=tf.float32, stddev=0.36)

Esempio n. 15

0

Mostra file

File: my-pacman-tensorflow-001.py Progetto: sunmingtao/sample-code

def q_network(state_tensor):
    inputs = state_tensor
    conv_outputs1 = tf.layers.conv2d(inputs, filters=32, kernel_size=(8,8), strides=4, padding='same', activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    conv_outputs2 = tf.layers.conv2d(conv_outputs1, filters=64, kernel_size=(4,4), strides=2, padding='same', activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    conv_outputs3 = tf.layers.conv2d(conv_outputs2, filters=64, kernel_size=(3,3), strides=1, padding='same', activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    flat_outputs = tf.reshape(conv_outputs3, shape=[-1, n_hidden_in])
    dense_outputs = tf.layers.dense(flat_outputs, n_hidden, activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    outputs = tf.layers.dense(dense_outputs, n_outputs, kernel_initializer=tf.variance_scaling_initializer())
    return outputs

Esempio n. 16

0

Mostra file

File: conv2d.py Progetto: caserzer/tensorpack

def Deconv2D(x, out_channel, kernel_shape,
             stride, padding='SAME',
             W_init=None, b_init=None,
             nl=tf.identity, use_bias=True,
             data_format='NHWC'):
    """
    2D deconvolution on 4D inputs.

    Args:
        x (tf.Tensor): a tensor of shape NHWC.
            Must have known number of channels, but can have other unknown dimensions.
        out_channel: the output number of channel.
        kernel_shape: (h, w) tuple or a int.
        stride: (h, w) tuple or a int.
        padding (str): 'valid' or 'same'. Case insensitive.
        W_init: initializer for W. Defaults to `tf.variance_scaling_initializer(2.0)`, i.e. kaiming-normal.
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function.
        use_bias (bool): whether to use bias.

    Returns:
        tf.Tensor: a NHWC tensor named ``output`` with attribute `variables`.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    in_shape = x.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[Deconv2D] Input cannot have unknown channel!"

    assert isinstance(out_channel, int), out_channel

    if W_init is None:
        W_init = tf.variance_scaling_initializer(scale=2.0)
    if b_init is None:
        b_init = tf.constant_initializer()

    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = tf.layers.Conv2DTranspose(
            out_channel, kernel_shape,
            strides=stride, padding=padding,
            data_format='channels_last' if data_format == 'NHWC' else 'channels_first',
            activation=lambda x: nl(x, name='output'),
            use_bias=use_bias,
            kernel_initializer=W_init,
            bias_initializer=b_init,
            trainable=True)
        ret = layer.apply(x, scope=tf.get_variable_scope())

    ret.variables = VariableHolder(W=layer.kernel)
    if use_bias:
        ret.variables.b = layer.bias
    return ret

Esempio n. 17

0

Mostra file

File: optimize.py Progetto: chqiwang/tensor2tensor

def get_variable_initializer(hparams):
  """Get variable initializer from hparams."""
  if not hparams.initializer:
    return None

  tf.logging.info("Using variable initializer: %s", hparams.initializer)
  if hparams.initializer == "orthogonal":
    return tf.orthogonal_initializer(gain=hparams.initializer_gain)
  elif hparams.initializer == "uniform":
    max_val = 0.1 * hparams.initializer_gain
    return tf.random_uniform_initializer(-max_val, max_val)
  elif hparams.initializer == "normal_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="normal")
  elif hparams.initializer == "uniform_unit_scaling":
    return tf.variance_scaling_initializer(
        hparams.initializer_gain, mode="fan_avg", distribution="uniform")
  else:
    raise ValueError("Unrecognized initializer: %s" % hparams.initializer)

Esempio n. 18

0

Mostra file

File: model.py Progetto: tobyma/tensorpack

def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs):
    """
    Args:
        feature (any shape):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers

    Returns:
        cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
    """
    l = feature
    with argscope(Conv2D, data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out', distribution='normal')):
        for k in range(num_convs):
            l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
        l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
                           kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
    return fastrcnn_outputs('outputs', l, num_classes)

Esempio n. 19

0

Mostra file

File: sequence_cnn_encoder.py Progetto: ufal/neuralmonkey

 def embedded_inputs(self) -> tf.Tensor:
     with tf.variable_scope("input_projection"):
         embedding_matrix = get_variable(
             "word_embeddings",
             [len(self.vocabulary), self.embedding_size],
             initializer=tf.variance_scaling_initializer(
                 mode="fan_avg", distribution="uniform"))
         return dropout(
             tf.nn.embedding_lookup(embedding_matrix, self.inputs),
             self.dropout_keep_prob,
             self.train_mode)

Esempio n. 20

0

Mostra file

File: resnet.py Progetto: seasky100/crnn

def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
  """Strided 2-D convolution with explicit padding."""
  # The padding is consistent and is based only on `kernel_size`, not on the
  # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
  if strides > 1:
    inputs = fixed_padding(inputs, kernel_size, data_format)

  return tf.layers.conv2d(
      inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
      padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
      kernel_initializer=tf.variance_scaling_initializer(),
      data_format=data_format)

Esempio n. 21

0

Mostra file

File: transformer.py Progetto: ufal/neuralmonkey

    def modality_matrix(self) -> tf.Tensor:
        """Create an embedding matrix for varyining target modalities.

        Used to embed different target space modalities in the tensor2tensor
        models (e.g. during the zero-shot translation).
        """
        emb_size = self.input_sequence.temporal_states.shape.as_list()[-1]
        return get_variable(
            name="target_modality_embedding_matrix",
            shape=[32, emb_size],
            dtype=tf.float32,
            initializer=tf.variance_scaling_initializer(
                mode="fan_avg", distribution="uniform"))

Esempio n. 22

0

Mostra file

File: model_frcnn.py Progetto: quanlzheng/tensorpack

def fastrcnn_2fc_head(feature):
    """
    Args:
        feature (any shape):

    Returns:
        2D head feature
    """
    dim = cfg.FPN.FRCNN_FC_HEAD_DIM
    init = tf.variance_scaling_initializer()
    hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
    hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
    return hidden

Esempio n. 23

0

Mostra file

File: resnet_model.py Progetto: ahuirecome/tensorpack

def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D, nl=tf.identity, use_bias=False,
                  W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
        logits = (LinearWrap(image)
                  .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
                  .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
                  .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1)
                  .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2)
                  .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2)
                  .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)
                  .GlobalAvgPooling('gap')
                  .FullyConnected('linear', 1000, nl=tf.identity)())
    return logits

Esempio n. 24

0

Mostra file

File: application_initializer.py Progetto: fepegar/NiftyNet

 def get_instance(args):
     """
     create an instance of the initializer
     """
     scale = float(args.get('scale', 1.0))
     mode = args.get('mode', "fan_in")
     assert (mode in ["fan_in", "fan_out", "fan_avg"])
     distribution = args.get('distribution', "normal")
     assert (distribution in ["normal", "uniform"])
     return tf.variance_scaling_initializer(scale,
                                            mode,
                                            distribution,
                                            seed=SEED)

Esempio n. 25

0

Mostra file

File: model_frcnn.py Progetto: quanlzheng/tensorpack

def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None):
    """
    Args:
        feature (NCHW):
        num_classes(int): num_category + 1
        num_convs (int): number of conv layers
        norm (str or None): either None or 'GN'

    Returns:
        2D head feature
    """
    assert norm in [None, 'GN'], norm
    l = feature
    with argscope(Conv2D, data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out', distribution='normal')):
        for k in range(num_convs):
            l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
            if norm is not None:
                l = GroupNorm('gn{}'.format(k), l)
        l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
                           kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
    return l

Esempio n. 26

0

Mostra file

File: model.py Progetto: tobyma/tensorpack

def fastrcnn_2fc_head(feature, num_classes):
    """
    Args:
        feature (any shape):
        num_classes(int): num_category + 1

    Returns:
        cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
    """
    dim = cfg.FPN.FRCNN_FC_HEAD_DIM
    init = tf.variance_scaling_initializer()
    hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
    hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
    return fastrcnn_outputs('outputs', hidden, num_classes)

Esempio n. 27

0

Mostra file

File: resnet_model.py Progetto: quanlzheng/tensorpack

def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D, use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
        # Note that this pads the image by [2, 3] instead of [3, 2].
        # Similar things happen in later stride=2 layers as well.
        l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
        l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
        l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
        l = group_func('group1', l, block_func, 128, num_blocks[1], 2)
        l = group_func('group2', l, block_func, 256, num_blocks[2], 2)
        l = group_func('group3', l, block_func, 512, num_blocks[3], 2)
        l = GlobalAvgPooling('gap', l)
        logits = FullyConnected('linear', l, 1000,
                                kernel_initializer=tf.random_normal_initializer(stddev=0.01))
    return logits

Esempio n. 28

0

Mostra file

File: model.py Progetto: caserzer/tensorpack

def maskrcnn_head(feature, num_class):
    """
    Args:
        feature (NxCx7x7):
        num_classes(int): num_category + 1

    Returns:
        mask_logits (N x num_category x 14 x 14):
    """
    with argscope([Conv2D, Deconv2D], data_format='NCHW',
                  W_init=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_in', distribution='normal')):
        l = Deconv2D('deconv', feature, 256, 2, stride=2, nl=tf.nn.relu)
        l = Conv2D('conv', l, num_class - 1, 1)
    return l

Esempio n. 29

0

Mostra file

File: shufflenet.py Progetto: quanlzheng/tensorpack

def DepthConv(x, out_channel, kernel_shape, padding='SAME', stride=1,
              W_init=None, activation=tf.identity):
    in_shape = x.get_shape().as_list()
    in_channel = in_shape[1]
    assert out_channel % in_channel == 0, (out_channel, in_channel)
    channel_mult = out_channel // in_channel

    if W_init is None:
        W_init = tf.variance_scaling_initializer(2.0)
    kernel_shape = [kernel_shape, kernel_shape]
    filter_shape = kernel_shape + [in_channel, channel_mult]

    W = tf.get_variable('W', filter_shape, initializer=W_init)
    conv = tf.nn.depthwise_conv2d(x, W, [1, 1, stride, stride], padding=padding, data_format='NCHW')
    return activation(conv, name='output')

Esempio n. 30

0

Mostra file

File: resnet.py Progetto: zeyu-h/tensor2tensor

def conv2d_fixed_padding(**kwargs):
  """conv2d with fixed_padding, based only on kernel_size."""
  strides = kwargs["strides"]
  if strides > 1:
    kwargs["inputs"] = fixed_padding(kwargs["inputs"], kwargs["kernel_size"],
                                     kwargs["data_format"])

  defaults = {
      "padding": ("SAME" if strides == 1 else "VALID"),
      "use_bias": False,
      "kernel_initializer": tf.variance_scaling_initializer(),
  }
  defaults.update(kwargs)

  return tf.layers.conv2d(**defaults)

Esempio n. 31

0

Mostra file

File: stacked_autoencoder.py Progetto: afifai/tensorflow_tutorial

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt

mnist = input_data.read_data_sets('data_MNIST/', one_hot=True)

input_node = 784
n_nodes_h1 = 621
n_nodes_h2 = 312
n_nodes_h3 = 128
n_nodes_h4 = 312
n_nodes_h5 = 621
n_classes = 784
batch_size = 128

scaler = tf.variance_scaling_initializer()

X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32)

# noise matrix
mean = 0.9
stddev = 0.7
noise_global = np.random.normal(mean, stddev, 784)


def stacked_an(X):
    hidden_1_layer = {
        'weights':
        tf.Variable(scaler([input_node, n_nodes_h1], dtype=tf.float32)),
        'biases': tf.Variable(scaler([n_nodes_h1], dtype=tf.float32))

Esempio n. 32

0

Mostra file

def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format,
                         training, dropout, dropout_prob):
    """Strided 2-D convolution with explicit padding."""
    # The padding is consistent and is based only on `kernel_size`, not on the
    # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
    if strides > 1:
        inputs = fixed_padding(inputs, kernel_size, data_format)
    output = None
    if dropout != 'spiral':
        output = tf.layers.conv2d(
            inputs=inputs,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=('SAME' if strides == 1 else 'VALID'),
            use_bias=False,
            kernel_initializer=tf.variance_scaling_initializer(),
            data_format=data_format)
    elif dropout == 'spiral':
        output = spiral_conv2d(
            inputs=inputs,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=('SAME' if strides == 1 else 'VALID'),
            use_bias=False,
            kernel_initializer=tf.variance_scaling_initializer(),
            data_format=data_format,
            training=training,
            spiral_prob=dropout_prob)
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=7)(output_channel_last,
                                                      training=training)
        return tf.transpose(output_channel_last, [0, 3, 2, 1],
                            name='dropblock_output')

    #if not training:
    #    dropout_prob = 1
    if dropout == 'dropout':
        print("Normal Dropout")
        output = Dropout(rate=(1 - dropout_prob))(output, training=training)

    elif dropout == 'spatial':
        print("spatial Dropout")
        output = SpatialDropout2D(rate=(1 - dropout_prob),
                                  data_format=data_format)(output,
                                                           training=training)
    elif dropout == 'dropblock3':
        print("DropBlock3")
        output = tf.identity(output, name='conv_output')
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=3)(output_channel_last,
                                                      training=training)
        output = tf.transpose(output_channel_last, [0, 3, 2, 1],
                              name='dropblock_output')
    elif dropout == 'dropblock5':
        print("DropBlock5")
        output = tf.identity(output, name='conv_output')
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=5)(output_channel_last,
                                                      training=training)
        output = tf.transpose(output_channel_last, [0, 3, 2, 1],
                              name='dropblock_output')
    elif dropout == 'dropblock7':
        print("DropBlock7")
        output = tf.identity(output, name='conv_output')
        output_channel_last = tf.transpose(output, [0, 3, 2, 1])
        output_channel_last = DropBlock(keep_prob=dropout_prob,
                                        block_size=7)(output_channel_last,
                                                      training=training)
        output = tf.transpose(output_channel_last, [0, 3, 2, 1],
                              name='dropblock_output')

    return output

Esempio n. 33

0

Mostra file

File: value_rnn_network.py Progetto: bohblue2/agents

  def __init__(self,
               observation_spec,
               conv_layer_params=None,
               input_fc_layer_params=(75, 40),
               lstm_size=(40,),
               output_fc_layer_params=(75, 40),
               activation_fn=tf.keras.activations.relu,
               name='ValueRnnNetwork'):
    """Creates an instance of `ValueRnnNetwork`.

    Network supports calls with shape outer_rank + observation_spec.shape. Note
    outer_rank must be at least 1.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied before
        the LSTM cell.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` contains more than one observation.
    """
    if len(nest.flatten(observation_spec)) > 1:
      raise ValueError(
          'Network only supports observation_specs with a single observation.')

    input_layers = utils.mlp_layers(
        conv_layer_params,
        input_fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.keras.initializers.glorot_uniform(),
        name='input_mlp')

    # Create RNN cell
    if len(lstm_size) == 1:
      cell = tf.keras.layers.LSTMCell(lstm_size[0])
    else:
      cell = tf.keras.layers.StackedRNNCells(
          [tf.keras.layers.LSTMCell(size) for size in lstm_size])

    state_spec = nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), list(cell.state_size))

    output_layers = []
    if output_fc_layer_params:
      output_layers = [
          tf.keras.layers.Dense(
              num_units,
              activation=activation_fn,
              kernel_initializer=tf.variance_scaling_initializer(
                  scale=2.0, mode='fan_in', distribution='truncated_normal'),
              name='output/dense') for num_units in output_fc_layer_params
      ]

    value_projection_layer = keras_layers.Dense(
        1,
        activation=None,
        kernel_initializer=tf.random_uniform_initializer(
            minval=-0.03, maxval=0.03),
    )

    state_spec = nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), list(cell.state_size))

    super(ValueRnnNetwork, self).__init__(
        observation_spec=observation_spec,
        action_spec=None,
        state_spec=state_spec,
        name=name)

    self._conv_layer_params = conv_layer_params
    self._input_layers = input_layers
    self._cell = cell
    self._output_layers = output_layers
    self._value_projection_layer = value_projection_layer

Esempio n. 34

0

Mostra file

def novel_fc(x, hidden_sizes, training=False, l = (1e-6, 1e-6, 1e-6), p = (0.5, 0.5, 0.5),\
             n_cosmo_params = 7, n_hod_params = 4):

    cosmo_sizes, hod_sizes, cap_sizes = hidden_sizes

    if type(l) is float:
        cosmo_l, hod_l, cap_l = l, l, l
    else:
        cosmo_l, hod_l, cap_l = l

    if type(p) is float:
        cosmo_p, hod_p, cap_p = p, p, p
    else:
        cosmo_p, hod_p, cap_p = p

    initializer = tf.variance_scaling_initializer(scale=2.0)

    #onlly for duplicating r
    n_params = n_cosmo_params + n_hod_params
    cosmo_x = tf.slice(x, [0, 0], [-1, n_cosmo_params])
    cosmo_x = tf.concat(
        values=[cosmo_x, tf.slice(x, [0, n_params - 1], [-1, -1])], axis=1)
    #print tf.shape(cosmo_x)
    #print tf.shape(tf.slice(x, [0, n_params-1], [-1, -1]))
    hod_x = tf.slice(x, [0, n_cosmo_params], [-1, -1])

    cosmo_regularizer = tf.contrib.layers.l1_regularizer(cosmo_l)
    cosmo_out = cosmo_x

    for size in cosmo_sizes:
        fc_output = tf.layers.dense(cosmo_out, size,
                                 kernel_initializer = initializer,\
                                    kernel_regularizer = cosmo_regularizer)
        bd_out = tf.layers.dropout(fc_output, cosmo_p, training=training)
        bn_out = tf.layers.batch_normalization(bd_out,
                                               axis=-1,
                                               training=training)
        cosmo_out = tf.nn.relu(bn_out)  #tf.nn.leaky_relu(bn_out, alpha=0.01)

    hod_regularizer = tf.contrib.layers.l1_regularizer(hod_l)
    hod_out = hod_x

    for size in hod_sizes:
        fc_output = tf.layers.dense(hod_out, size,
                                 kernel_initializer = initializer,\
                                    kernel_regularizer = hod_regularizer)
        bd_out = tf.layers.dropout(fc_output, hod_p, training=training)
        bn_out = tf.layers.batch_normalization(bd_out,
                                               axis=-1,
                                               training=training)
        hod_out = tf.nn.relu(bn_out)  #tf.nn.leaky_relu(bn_out, alpha=0.01)

    cap_out = tf.concat(values=[cosmo_out, hod_out], axis=1)
    cap_regularizer = tf.contrib.layers.l1_regularizer(cap_l)

    for size in cap_sizes:
        fc_output = tf.layers.dense(cap_out, size,
                                 kernel_initializer = initializer,\
                                    kernel_regularizer = cap_regularizer)
        bd_out = tf.layers.dropout(fc_output, cap_p, training=training)
        bn_out = tf.layers.batch_normalization(bd_out,
                                               axis=-1,
                                               training=training)
        cap_out = tf.nn.relu(bn_out)  #tf.nn.leaky_relu(bn_out, alpha=0.01)

    pred = tf.layers.dense(cap_out,
                           1,
                           kernel_initializer=initializer,
                           kernel_regularizer=cap_regularizer)[:, 0]  #,
    return pred

Esempio n. 35

0

Mostra file

def conv2d_fixed_padding(inputs,
                         filters,
                         kernel_size,
                         strides,
                         data_format="channels_first",
                         use_td=False,
                         targeting_rate=None,
                         keep_prob=None,
                         is_training=None):
  """Strided 2-D convolution with explicit padding.

  The padding is consistent and is based only on `kernel_size`, not on the
  dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).

  Args:
    inputs: `Tensor` of size `[batch, channels, height_in, width_in]`.
    filters: `int` number of filters in the convolution.
    kernel_size: `int` size of the kernel to be used in the convolution.
    strides: `int` strides of the convolution.
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
    use_td: `str` one of "weight" or "unit". Set to False or "" to disable
      targeted dropout.
    targeting_rate: `float` proportion of weights to target with targeted
      dropout.
    keep_prob: `float` keep probability for targeted dropout.
    is_training: `bool` for whether the model is in training.

  Returns:
    A `Tensor` of shape `[batch, filters, height_out, width_out]`.

  Raises:
    Exception: if use_td is not valid.
  """
  if strides > 1:
    inputs = fixed_padding(inputs, kernel_size, data_format=data_format)

  if use_td:
    inputs_shape = common_layers.shape_list(inputs)
    if use_td == "weight":
      if data_format == "channels_last":
        size = kernel_size * kernel_size * inputs_shape[-1]
      else:
        size = kernel_size * kernel_size * inputs_shape[1]
      targeting_count = targeting_rate * tf.to_float(size)
      targeting_fn = common_layers.weight_targeting
    elif use_td == "unit":
      targeting_count = targeting_rate * filters
      targeting_fn = common_layers.unit_targeting
    else:
      raise Exception("Unrecognized targeted dropout type: %s" % use_td)

    y = common_layers.td_conv(
        inputs,
        filters,
        kernel_size,
        targeting_count,
        targeting_fn,
        keep_prob,
        is_training,
        do_prune=True,
        strides=strides,
        padding=("SAME" if strides == 1 else "VALID"),
        data_format=data_format,
        use_bias=False,
        kernel_initializer=tf.variance_scaling_initializer())
  else:
    y = tf.layers.conv2d(
        inputs=inputs,
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=("SAME" if strides == 1 else "VALID"),
        use_bias=False,
        kernel_initializer=tf.variance_scaling_initializer(),
        data_format=data_format)

  return y

Esempio n. 36

0

Mostra file

batch_size = 500
print('n_epoch = ', n_epoch)
print('batch_size = ', batch_size)
print()

#%% Graph
tf.reset_default_graph()

X = tf.placeholder(tf.float32, shape=(None, n_input), name="X")
Y = tf.placeholder(tf.float32, shape=(None, n_output), name="Y")
training = tf.placeholder_with_default(False, shape=(), name='training')

batch_norm_momentum = 0.9

with tf.name_scope("dnn"):
    he_init = tf.variance_scaling_initializer()

    my_batch_norm_layer = partial(tf.layers.batch_normalization,
                                  training=training,
                                  momentum=batch_norm_momentum)

    my_dense_layer = partial(tf.layers.dense,
                             kernel_initializer=he_init,
                             activation=tf.nn.relu)

    hidden1 = my_dense_layer(X, n_hidden[0], name="hidden1")
    bn1 = tf.nn.relu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden[1], name="hidden2")
    bn2 = tf.nn.relu(my_batch_norm_layer(hidden2))
    outputs = tf.layers.dense(bn2,
                              n_output,

Esempio n. 37

0

Mostra file

File: graph.py Progetto: nsh23/hover_fork

    def _build_graph(self, inputs):

        images, truemap_coded = inputs

        orig_imgs = images

        true_np = truemap_coded[..., 0]
        true_np = tf.cast(true_np, tf.int32)
        true_np = tf.identity(true_np, name='truemap-np')
        one_np = tf.one_hot(true_np, 2, axis=-1)
        true_np = tf.expand_dims(true_np, axis=-1)

        true_dist = truemap_coded[..., 1:]
        true_dist = tf.identity(true_dist, name='truemap-dist')

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            ####
            d = encoder(i, self.freeze)
            d[0] = crop_op(d[0], (184, 184))
            d[1] = crop_op(d[1], (72, 72))

            ####
            np_feat = decoder('np', d)
            np = BNReLU('preact_out_np', np_feat[-1])

            dist_feat = decoder('dst', d)
            dist = BNReLU('preact_out_dist', dist_feat[-1])

            ####
            logi_np = Conv2D('conv_out_np',
                             np,
                             2,
                             1,
                             use_bias=True,
                             activation=tf.identity)
            logi_np = tf.transpose(logi_np, [0, 2, 3, 1])
            soft_np = tf.nn.softmax(logi_np, axis=-1)
            prob_np = tf.identity(soft_np[..., 1], name='predmap-prob-np')
            prob_np = tf.expand_dims(prob_np, axis=-1)
            pred_np = tf.argmax(soft_np, axis=-1, name='predmap-np')
            pred_np = tf.expand_dims(tf.cast(pred_np, tf.float32), axis=-1)

            ####
            logi_dist = Conv2D('conv_out_dist',
                               dist,
                               1,
                               1,
                               use_bias=True,
                               activation=tf.identity)
            logi_dist = tf.transpose(logi_dist, [0, 2, 3, 1])
            prob_dist = tf.identity(logi_dist, name='predmap-prob-dist')
            pred_dist = tf.identity(logi_dist, name='predmap-dist')

            # encoded so that inference can extract all output at once
            predmap_coded = tf.concat([prob_np, pred_dist],
                                      axis=-1,
                                      name='predmap-coded')
        ####

        ####
        if get_current_tower_context().is_training:
            ######## LOSS
            ### Distance regression loss
            loss_mse = pred_dist - true_dist
            loss_mse = loss_mse * loss_mse
            loss_mse = tf.reduce_mean(loss_mse, name='loss-mse')
            add_moving_summary(loss_mse)

            ### Nuclei Blob classification loss
            loss_bce = categorical_crossentropy(soft_np, one_np)
            loss_bce = tf.reduce_mean(loss_bce, name='loss-bce')
            add_moving_summary(loss_bce)

            ### combine the loss into single cost function
            self.cost = tf.identity(loss_mse + loss_bce, name='overall-loss')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC")

            pred_np = colorize(prob_np[..., 0], cmap='jet')
            true_np = colorize(true_np[..., 0], cmap='jet')

            pred_dist = colorize(prob_dist[..., 0], cmap='jet')
            true_dist = colorize(true_dist[..., 0], cmap='jet')

            viz = tf.concat([
                orig_imgs,
                true_np,
                pred_np,
                true_dist,
                pred_dist,
            ], 2)

            tf.summary.image('output', viz, max_outputs=1)

        return

Esempio n. 38

0

Mostra file

File: graph.py Progetto: nsh23/hover_fork

    def _build_graph(self, inputs):

        images, truemap_coded = inputs
        orig_imgs = images

        if hasattr(self, 'type_classification') and self.type_classification:
            true_type = truemap_coded[..., 1]
            true_type = tf.cast(true_type, tf.int32)
            true_type = tf.identity(true_type, name='truemap-type')
            one_type = tf.one_hot(true_type, self.nr_types, axis=-1)
            true_type = tf.expand_dims(true_type, axis=-1)

            true_np = tf.cast(true_type > 0, tf.int32)  # ? sanity this
            true_np = tf.identity(true_np, name='truemap-np')
            one_np = tf.one_hot(tf.squeeze(true_np), 2, axis=-1)
        else:
            true_np = truemap_coded[..., 0]
            true_np = tf.cast(true_np, tf.int32)
            true_np = tf.identity(true_np, name='truemap-np')
            one_np = tf.one_hot(true_np, 2, axis=-1)
            true_np = tf.expand_dims(true_np, axis=-1)

        true_hv = truemap_coded[..., -2:]
        true_hv = tf.identity(true_hv, name='truemap-hv')

        ####
        with argscope(Conv2D, activation=tf.identity, use_bias=False, # K.he initializer
                      W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
                argscope([Conv2D, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images, [0, 3, 1, 2])
            i = i if not self.input_norm else i / 255.0

            ####
            d = encoder(i, self.freeze)
            d[0] = crop_op(d[0], (184, 184))
            d[1] = crop_op(d[1], (72, 72))

            ####
            np_feat = decoder('np', d)
            npx = BNReLU('preact_out_np', np_feat[-1])

            hv_feat = decoder('hv', d)
            hv = BNReLU('preact_out_hv', hv_feat[-1])

            if self.type_classification:
                tp_feat = decoder('tp', d)
                tp = BNReLU('preact_out_tp', tp_feat[-1])

                # Nuclei Type Pixels (TP)
                logi_class = Conv2D('conv_out_tp',
                                    tp,
                                    self.nr_types,
                                    1,
                                    use_bias=True,
                                    activation=tf.identity)
                logi_class = tf.transpose(logi_class, [0, 2, 3, 1])
                soft_class = tf.nn.softmax(logi_class, axis=-1)

            #### Nuclei Pixels (NP)
            logi_np = Conv2D('conv_out_np',
                             npx,
                             2,
                             1,
                             use_bias=True,
                             activation=tf.identity)
            logi_np = tf.transpose(logi_np, [0, 2, 3, 1])
            soft_np = tf.nn.softmax(logi_np, axis=-1)
            prob_np = tf.identity(soft_np[..., 1], name='predmap-prob-np')
            prob_np = tf.expand_dims(prob_np, axis=-1)

            #### Horizontal-Vertival (HV)
            logi_hv = Conv2D('conv_out_hv',
                             hv,
                             2,
                             1,
                             use_bias=True,
                             activation=tf.identity)
            logi_hv = tf.transpose(logi_hv, [0, 2, 3, 1])
            prob_hv = tf.identity(logi_hv, name='predmap-prob-hv')
            pred_hv = tf.identity(logi_hv, name='predmap-hv')

            # * channel ordering: type-map, segmentation map
            # encoded so that inference can extract all output at once
            if self.type_classification:
                predmap_coded = tf.concat([soft_class, prob_np, pred_hv],
                                          axis=-1,
                                          name='predmap-coded')
            else:
                predmap_coded = tf.concat([prob_np, pred_hv],
                                          axis=-1,
                                          name='predmap-coded')
        ####
        def get_gradient_hv(l, h_ch, v_ch):
            """
            Calculate the horizontal partial differentiation for horizontal channel
            and the vertical partial differentiation for vertical channel.
            The partial differentiation is approximated by calculating the central differnce
            which is obtained by using Sobel kernel of size 5x5. The boundary is zero-padded
            when channel is convolved with the Sobel kernel.
            Args:
                l (tensor): tensor of shape NHWC with C should be 2 (1 channel for horizonal 
                            and 1 channel for vertical)
                h_ch(int) : index within C axis of `l` that corresponds to horizontal channel
                v_ch(int) : index within C axis of `l` that corresponds to vertical channel
            """
            def get_sobel_kernel(size):
                assert size % 2 == 1, 'Must be odd, get size=%d' % size

                h_range = np.arange(-size // 2 + 1,
                                    size // 2 + 1,
                                    dtype=np.float32)
                v_range = np.arange(-size // 2 + 1,
                                    size // 2 + 1,
                                    dtype=np.float32)
                h, v = np.meshgrid(h_range, v_range)
                kernel_h = h / (h * h + v * v + 1.0e-15)
                kernel_v = v / (h * h + v * v + 1.0e-15)
                return kernel_h, kernel_v

            mh, mv = get_sobel_kernel(5)
            mh = tf.constant(mh, dtype=tf.float32)
            mv = tf.constant(mv, dtype=tf.float32)

            mh = tf.reshape(mh, [5, 5, 1, 1])
            mv = tf.reshape(mv, [5, 5, 1, 1])

            # central difference to get gradient, ignore the boundary problem
            h = tf.expand_dims(l[..., h_ch], axis=-1)
            v = tf.expand_dims(l[..., v_ch], axis=-1)
            dh = tf.nn.conv2d(h, mh, strides=[1, 1, 1, 1], padding='SAME')
            dv = tf.nn.conv2d(v, mv, strides=[1, 1, 1, 1], padding='SAME')
            output = tf.concat([dh, dv], axis=-1)
            return output

        def loss_mse(true, pred, name=None):
            ### regression loss
            loss = pred - true
            loss = tf.reduce_mean(loss * loss, name=name)
            return loss

        def loss_msge(true, pred, focus, name=None):
            focus = tf.stack([focus, focus], axis=-1)
            pred_grad = get_gradient_hv(pred, 1, 0)
            true_grad = get_gradient_hv(true, 1, 0)
            loss = pred_grad - true_grad
            loss = focus * (loss * loss)
            # artificial reduce_mean with focus region
            loss = tf.reduce_sum(loss) / (tf.reduce_sum(focus) + 1.0e-8)
            loss = tf.identity(loss, name=name)
            return loss

        ####
        if get_current_tower_context().is_training:
            #---- LOSS ----#
            loss = 0
            for term, weight in self.loss_term.items():
                if term == 'mse':
                    term_loss = loss_mse(true_hv, pred_hv, name='loss-mse')
                elif term == 'msge':
                    focus = truemap_coded[..., 0]
                    term_loss = loss_msge(true_hv,
                                          pred_hv,
                                          focus,
                                          name='loss-msge')
                elif term == 'bce':
                    term_loss = categorical_crossentropy(soft_np, one_np)
                    term_loss = tf.reduce_mean(term_loss, name='loss-bce')
                elif 'dice' in self.loss_term:
                    term_loss = dice_loss(soft_np[...,0], one_np[...,0]) \
                              + dice_loss(soft_np[...,1], one_np[...,1])
                    term_loss = tf.identity(term_loss, name='loss-dice')
                else:
                    assert False, 'Not support loss term: %s' % term
                add_moving_summary(term_loss)
                loss += term_loss * weight

            if self.type_classification:
                term_loss = categorical_crossentropy(soft_class, one_type)
                term_loss = tf.reduce_mean(term_loss,
                                           name='loss-xentropy-class')
                add_moving_summary(term_loss)
                loss = loss + term_loss

                # term_loss = dice_loss(soft_class[...,0], one_type[...,0]) \
                #           + dice_loss(soft_class[...,1], one_type[...,1]) \
                #           + dice_loss(soft_class[...,2], one_type[...,2]) \
                #           + dice_loss(soft_class[...,3], one_type[...,3]) \
                #           + dice_loss(soft_class[...,4], one_type[...,4])

                term_loss = 0
                for type_id in range(self.nr_types):
                    term_loss += dice_loss(soft_class[..., type_id],
                                           one_type[..., type_id])

                term_loss = tf.identity(term_loss, name='loss-dice-class')
                add_moving_summary(term_loss)
                loss = loss + term_loss

            ### combine the loss into single cost function
            self.cost = tf.identity(loss, name='overall-loss')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            ### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (190, 190), "NHWC")

            pred_np = colorize(prob_np[..., 0], cmap='jet')
            true_np = colorize(true_np[..., 0], cmap='jet')

            pred_h = colorize(prob_hv[..., 0], vmin=-1, vmax=1, cmap='jet')
            pred_v = colorize(prob_hv[..., 1], vmin=-1, vmax=1, cmap='jet')
            true_h = colorize(true_hv[..., 0], vmin=-1, vmax=1, cmap='jet')
            true_v = colorize(true_hv[..., 1], vmin=-1, vmax=1, cmap='jet')

            if not self.type_classification:
                viz = tf.concat([
                    orig_imgs, pred_h, pred_v, pred_np, true_h, true_v, true_np
                ], 2)
            else:
                pred_type = tf.transpose(soft_class, (0, 1, 3, 2))
                pred_type = tf.reshape(pred_type, [-1, 80, 80 * self.nr_types])
                true_type = tf.cast(true_type[..., 0] / self.nr_classes,
                                    tf.float32)
                true_type = colorize(true_type, vmin=0, vmax=1, cmap='jet')
                pred_type = colorize(pred_type, vmin=0, vmax=1, cmap='jet')

                viz = tf.concat([
                    orig_imgs,
                    pred_h,
                    pred_v,
                    pred_np,
                    pred_type,
                    true_h,
                    true_v,
                    true_np,
                    true_type,
                ], 2)

            viz = tf.concat([viz[0], viz[-1]], axis=0)
            viz = tf.expand_dims(viz, axis=0)
            tf.summary.image('output', viz, max_outputs=1)

        return

Esempio n. 39

0

Mostra file

File: DiscoGAN-CelebA.py Progetto: zhouleidcc/tensorpack

    def build_graph(self, A, B):
        A = tf.transpose(A / 255.0, [0, 3, 1, 2])
        B = tf.transpose(B / 255.0, [0, 3, 1, 2])

        # use the torch initializers
        with argscope([Conv2D, Conv2DTranspose, FullyConnected],
                      kernel_initializer=tf.variance_scaling_initializer(scale=0.333, distribution='uniform'),
                      use_bias=False), \
                argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
                argscope([Conv2D, Conv2DTranspose, BatchNorm], data_format='NCHW'):
            with tf.variable_scope('gen'):
                with tf.variable_scope('B'):
                    AB = self.generator(A)
                with tf.variable_scope('A'):
                    BA = self.generator(B)
                    ABA = self.generator(AB)
                with tf.variable_scope('B'):
                    BAB = self.generator(BA)

            viz_A_recon = tf.concat([A, AB, ABA], axis=3, name='viz_A_recon')
            viz_B_recon = tf.concat([B, BA, BAB], axis=3, name='viz_B_recon')
            tf.summary.image('Arecon',
                             tf.transpose(viz_A_recon, [0, 2, 3, 1]),
                             max_outputs=50)
            tf.summary.image('Brecon',
                             tf.transpose(viz_B_recon, [0, 2, 3, 1]),
                             max_outputs=50)

            with tf.variable_scope('discrim'):
                with tf.variable_scope('A'):
                    A_dis_real, A_feats_real = self.discriminator(A)
                    A_dis_fake, A_feats_fake = self.discriminator(BA)

                with tf.variable_scope('B'):
                    B_dis_real, B_feats_real = self.discriminator(B)
                    B_dis_fake, B_feats_fake = self.discriminator(AB)

        with tf.name_scope('LossA'):
            # reconstruction loss
            recon_loss_A = tf.reduce_mean(tf.squared_difference(A, ABA),
                                          name='recon_loss')
            # gan loss
            self.build_losses(A_dis_real, A_dis_fake)
            G_loss_A = self.g_loss
            D_loss_A = self.d_loss
            # feature matching loss
            fm_loss_A = self.get_feature_match_loss(A_feats_real, A_feats_fake)

        with tf.name_scope('LossB'):
            recon_loss_B = tf.reduce_mean(tf.squared_difference(B, BAB),
                                          name='recon_loss')
            self.build_losses(B_dis_real, B_dis_fake)
            G_loss_B = self.g_loss
            D_loss_B = self.d_loss
            fm_loss_B = self.get_feature_match_loss(B_feats_real, B_feats_fake)

        global_step = get_global_step_var()
        rate = tf.train.piecewise_constant(global_step, [np.int64(10000)],
                                           [0.01, 0.5])
        rate = tf.identity(rate, name='rate')  # TF issue#8594
        g_loss = tf.add_n([((G_loss_A + G_loss_B) * 0.1 +
                            (fm_loss_A + fm_loss_B) * 0.9) * (1 - rate),
                           (recon_loss_A + recon_loss_B) * rate],
                          name='G_loss_total')
        d_loss = tf.add_n([D_loss_A, D_loss_B], name='D_loss_total')

        self.collect_variables('gen', 'discrim')
        # weight decay
        wd_g = regularize_cost('gen/.*/W',
                               l2_regularizer(1e-5),
                               name='G_regularize')
        wd_d = regularize_cost('discrim/.*/W',
                               l2_regularizer(1e-5),
                               name='D_regularize')

        self.g_loss = g_loss + wd_g
        self.d_loss = d_loss + wd_d

        add_moving_summary(recon_loss_A, recon_loss_B, rate, g_loss, d_loss,
                           wd_g, wd_d)

Esempio n. 40

0

Mostra file

File: ppo_nets.py Progetto: chychen/RL_strategies

def net(observations, config):
    # observation space = shape=(batch_size, episode_length, 10, 14, 2)
    # action space = shape=(batch, episode_length, 23)
    batch_size = tf.shape(observations)[0]
    episode_len = tf.shape(observations)[1]

    input_ = tf.reshape(observations, shape=[batch_size, episode_len, observations.shape.as_list()[
                        2], functools.reduce(operator.mul, observations.shape.as_list()[3:], 1)])
    init_xavier_weights = tf.variance_scaling_initializer(
        scale=1.0, mode='fan_avg', distribution='uniform')
    init_output_weights = tf.variance_scaling_initializer(
        scale=config.init_output_factor, mode='fan_in', distribution='normal')
    # seperate value and policy
    with tf.variable_scope('o_trunk_policy'):
        conv1 = tf.layers.conv2d(
            inputs=input_,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights
        )
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
            operator.mul, conv2.shape.as_list()[2:], 1)])
        trunk_fc = tf.layers.dense(
            inputs=flatten,
            units=256,
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        with tf.variable_scope('o_crown'):
            # offensive
            off_fc = tf.layers.dense(
                inputs=trunk_fc,
                units=128,
                activation=tf.nn.relu,
                kernel_initializer=init_xavier_weights,
            )
            with tf.variable_scope('actions'):
                off_action_mean = tf.layers.dense(
                    inputs=off_fc,
                    units=12,
                    activation=tf.tanh,  # NOTE tanh is not good?
                    kernel_initializer=init_output_weights,
                )
            with tf.variable_scope('decision'):
                logits = tf.layers.dense(
                    inputs=off_fc,
                    units=3,
                    activation=None,
                    kernel_initializer=init_output_weights,
                )

    with tf.variable_scope('o_trunk_value'):
        conv1 = tf.layers.conv2d(
            inputs=input_,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights
        )
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=128,
            kernel_size=[1, 3],
            padding='valid',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
            operator.mul, conv2.shape.as_list()[2:], 1)])
        trunk_fc = tf.layers.dense(
            inputs=flatten,
            units=256,
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        with tf.variable_scope('o_crown'):
            # offensive
            off_fc = tf.layers.dense(
                inputs=trunk_fc,
                units=128,
                activation=tf.nn.relu,
                kernel_initializer=init_xavier_weights,
            )
            off_value = tf.layers.dense(
                inputs=off_fc,
                units=1,
                activation=None,
                kernel_initializer=init_output_weights,
            )
            off_value = tf.reshape(
                off_value, shape=[batch_size, episode_len])
            off_value = tf.check_numerics(off_value, 'off_value')

    # with tf.variable_scope('o_trunk'):
    #     conv1 = tf.layers.conv2d(
    #         inputs=input_,
    #         filters=64,
    #         kernel_size=[1, 3],
    #         padding='same',
    #         activation=tf.nn.relu,
    #         kernel_initializer=init_xavier_weights
    #     )
    #     conv2 = tf.layers.conv2d(
    #         inputs=conv1,
    #         filters=64,
    #         kernel_size=[1, 3],
    #         padding='same',
    #         activation=tf.nn.relu,
    #         kernel_initializer=init_xavier_weights,
    #     )
    #     flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
    #         operator.mul, conv2.shape.as_list()[2:], 1)])
    #     trunk_fc = tf.layers.dense(
    #         inputs=flatten,
    #         units=128,
    #         activation=tf.nn.relu,
    #         kernel_initializer=init_xavier_weights,
    #     )
    #     with tf.variable_scope('o_crown'):
    #         # offensive
    #         off_fc = tf.layers.dense(
    #             inputs=trunk_fc,
    #             units=64,
    #             activation=tf.nn.relu,
    #             kernel_initializer=init_xavier_weights,
    #         )
    #         with tf.variable_scope('policy'):
    #             with tf.variable_scope('actions'):
    #                 off_action_mean = tf.layers.dense(
    #                     inputs=off_fc,
    #                     units=12,
    #                     activation=tf.tanh,  # NOTE tanh is not good?
    #                     kernel_initializer=init_output_weights,
    #                 )
    #             with tf.variable_scope('decision'):
    #                 logits = tf.layers.dense(
    #                     inputs=off_fc,
    #                     units=3,
    #                     activation=None,
    #                     kernel_initializer=init_output_weights,
    #                 )
    #         with tf.variable_scope('value'):
    #             off_value = tf.layers.dense(
    #                 inputs=off_fc,
    #                 units=1,
    #                 activation=None,
    #                 kernel_initializer=init_output_weights,
    #             )
    #             off_value = tf.reshape(
    #                 off_value, shape=[batch_size, episode_len])
    #             off_value = tf.check_numerics(off_value, 'off_value')

    with tf.variable_scope('d_trunk'):
        conv1 = tf.layers.conv2d(
            inputs=input_,
            filters=64,
            kernel_size=[1, 3],
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        conv2 = tf.layers.conv2d(
            inputs=conv1,
            filters=64,
            kernel_size=[1, 3],
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        flatten = tf.reshape(conv2, shape=[batch_size, episode_len, functools.reduce(
            operator.mul, conv2.shape.as_list()[2:], 1)])
        trunk_fc = tf.layers.dense(
            inputs=flatten,
            units=128,
            activation=tf.nn.relu,
            kernel_initializer=init_xavier_weights,
        )
        with tf.variable_scope('d_crown'):
            # defensive
            def_fc = tf.layers.dense(
                inputs=trunk_fc,
                units=64,
                activation=tf.nn.relu,
                kernel_initializer=init_xavier_weights,
            )
            with tf.variable_scope('policy'):
                with tf.variable_scope('actions'):
                    def_action_mean = tf.layers.dense(
                        inputs=def_fc,
                        units=10,
                        activation=tf.tanh,  # NOTE tanh is not good?
                        kernel_initializer=init_output_weights,
                    )
            with tf.variable_scope('value'):
                def_value = tf.layers.dense(
                    inputs=def_fc,
                    units=1,
                    activation=None,
                    kernel_initializer=init_output_weights,
                )
                def_value = tf.reshape(
                    def_value, shape=[batch_size, episode_len])
                def_value = tf.check_numerics(def_value, 'def_value')

    return logits, off_action_mean, off_value, def_action_mean, def_value

Esempio n. 41

0

Mostra file

File: transformer.py Progetto: vince62s/OpenNMT-tf

 def _initializer(self, params):
     return tf.variance_scaling_initializer(mode="fan_avg",
                                            distribution="uniform",
                                            dtype=self.dtype)

Esempio n. 42

0

Mostra file

File: my_3d_lstm.py Progetto: zhaonat/cs231n_tfR2N2

def train_my_lstm():
    # Data
    train_size = 10000
    test_size = 100
    batch_size = 100
    time_steps = 5
    num_features = 1

    # problem_type = 'regression'
    # X_train, y_train = sin_data(train_size, time_steps=time_steps)
    # X_test, y_test = sin_data(test_size, time_steps=time_steps)

    problem_type = 'binary_classification'
    X_train, y_train = binary_data(train_size, time_steps=time_steps)
    X_test, y_test = binary_data(test_size, time_steps=time_steps)

    # Place holders. Do NOT write None for batch_size
    inputs = tf.placeholder(tf.float32,
                            shape=[batch_size, time_steps, num_features])
    truth = tf.placeholder(tf.float32, shape=[batch_size, 1])

    initializer = tf.variance_scaling_initializer(scale=2.0)

    # Network structure: 3D LSTM - Dense(1)
    X = my_3d_lstm(inputs)
    X = tf.layers.flatten(X)
    outputs = tf.layers.dense(X, 1, kernel_initializer=initializer)

    if problem_type == 'binary_classification':
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=truth,
                                                       logits=outputs)
        loss = tf.reduce_mean(loss)
    elif problem_type == 'regression':
        loss = tf.nn.l2_loss(truth - outputs) / batch_size

    optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
    optimizer = optimizer.minimize(loss)

    # Initialize and run the graph
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    for epoch_index in range(20):
        for batch_index in range(train_size // batch_size):
            X_train_batch = X_train[batch_index *
                                    batch_size:(batch_index + 1) * batch_size]
            y_train_batch = y_train[batch_index *
                                    batch_size:(batch_index + 1) * batch_size]

            results = sess.run(optimizer,
                               feed_dict={
                                   inputs: X_train_batch,
                                   truth: y_train_batch
                               })
            if (batch_index % 10 == 0):
                results_train_batch = sess.run([outputs, loss],
                                               feed_dict={
                                                   inputs: X_train_batch,
                                                   truth: y_train_batch
                                               })
                results_test = sess.run([outputs, loss],
                                        feed_dict={
                                            inputs: X_test,
                                            truth: y_test
                                        })
                print('------------------------------')
                print('Epoch %d. Batch %d.' % (epoch_index, batch_index))
                print('Train accuracy: %f. Loss: %f.' %
                      (np.mean((results_train_batch[0] > 0) == y_train_batch),
                       results_train_batch[1]))
                # print('Train accuracy: %f. Loss: %f.' % (np.mean(np.abs(results_train_batch[0]-y_train_batch)), results_train_batch[1]))
                print('Test accuracy: %f. Loss: %f.' % (np.mean(
                    (results_test[0] > 0) == y_test), results_test[1]))

Esempio n. 43

0

Mostra file

File: model.py Progetto: GitSkylerz/speech-separation

    def __build(self):
        with self._graph.as_default():
            with tf.variable_scope("inputs"):
                # input arguments
                self._features = tf.placeholder(tf.float32, shape=[None, self.t_bins, self.f_bins, 2*self.n_mic])
                self._targets  = tf.placeholder(tf.float32, shape=[None, self.t_bins, self.f_bins, 2*self.n_src])
                global_step    = tf.Variable(1, trainable=False)
                
                # preprocessing
                feature_magnitude = self._features[..., :self.n_mic]
                target_magnitude  = self._targets[..., :self.n_src]
                feature_phase     = self._features[..., self.n_mic:]
                target_phase      = self._targets[..., self.n_src:]
                phase_difference  = target_phase - self._features[..., self.n_mic:self.n_mic+1]
                target_real       = target_magnitude * tf.cos(phase_difference)
                target_image      = target_magnitude * tf.sin(phase_difference)
            
            with tf.variable_scope("cnn", initializer=tf.keras.initializers.Orthogonal(gain=1.0),
                                          regularizer=tf.contrib.layers.l2_regularizer(scale=1e-6)):
                conv = tf.layers.conv2d(feature_magnitude, 64, (3, 3), (1, 1), 
                                        "same", activation=tf.nn.relu)
                conv = tf.layers.max_pooling2d(conv, [1, 4], [1, 4], "valid")
                conv = tf.layers.conv2d(conv, 64, (3, 3), (1, 1), 
                                        "same", activation=tf.nn.relu)
                conv = tf.layers.max_pooling2d(conv, [1, 2], [1, 2], "valid")
                conv = tf.layers.conv2d(conv, 64, (3, 3), (1, 1), 
                                        "same", activation=tf.nn.relu)
                conv = tf.layers.max_pooling2d(conv, [1, 2], [1, 2], "valid")
                conv = tf.reshape(conv, (-1, self.t_bins, 64*32))
                conv = tf.unstack(conv, axis=1)
            
            with tf.variable_scope("rnn", initializer=tf.variance_scaling_initializer(), 
                                          regularizer=tf.contrib.layers.l2_regularizer(scale=1e-6)):
                # cells formation
                cells_forward  = []
                cells_backward = []
                for i in range(3):
                    cell = tf.nn.rnn_cell.GRUCell(num_units=1024)
                    cells_forward.append(cell)
                    cell = tf.nn.rnn_cell.GRUCell(num_units=1024)
                    cells_backward.append(cell)

                # rnn formation
                rnn_forward  = tf.nn.rnn_cell.MultiRNNCell(cells=cells_forward)
                rnn_backward = tf.nn.rnn_cell.MultiRNNCell(cells=cells_backward)
                rnn, _, _ = tf.nn.static_bidirectional_rnn(rnn_forward, rnn_backward, conv, dtype=tf.float32)
                rnn = tf.stack(rnn, axis=1)
                rnn = tf.reshape(rnn, [-1, self.t_bins, 2048])
            
            with tf.variable_scope("fnn", initializer=tf.variance_scaling_initializer(), 
                                          regularizer=tf.contrib.layers.l2_regularizer(scale=1e-6)):
                fnn = tf.layers.dense(rnn, units=self.f_bins*self.n_src)
                fnn = tf.nn.relu(fnn)
            
            with tf.variable_scope("mask", initializer=tf.keras.initializers.Orthogonal(gain=1.0),
                                           regularizer=tf.contrib.layers.l2_regularizer(1e-6)):
                # mask for real part
                mask_real = tf.layers.dense(fnn, units=self.f_bins*self.n_src)
                mask_real = tf.reshape(mask_real, [-1, self.t_bins, self.f_bins, self.n_src])
                # mask_rv = 1 - tf.reduce_sum(mask_re, axis=-1, keepdims=True)
                # self._mask_re = tf.concat([mask_re, mask_rv], axis=-1)

                # mask for imag part
                mask_image = tf.layers.dense(fnn, units=self.f_bins*self.n_src)
                mask_image = tf.reshape(mask_image, [-1, self.t_bins, self.f_bins, self.n_src])
                # mask_iv = 1 - tf.reduce_sum(mask_im, axis=-1, keepdims=True)
                # self._mask_im = tf.concat([mask_im, mask_iv], axis=-1)

            with tf.variable_scope("outputs"):
                # logits layer
                logits_real  = mask_real  * feature_magnitude[..., :1]
                logits_image = mask_image * feature_magnitude[..., :1]
                self._logits = tf.concat((logits_real, logits_image), axis=-1)
                # logit_re     = tf.concat((logits_real, mask_rv * ftr_mgt[..., :1]), axis=-1)
                # logit_im     = tf.concat((logit_im, mask_iv * ftr_mgt[..., :1]), axis=-1)
                
                # regression: MSE & L2-regularization & permutational loss
                self._loss = DTLoss(target_real, logits_real, self.n_src) + \
                             DTLoss(target_image, logits_image, self.n_src) + \
                             tf.losses.get_regularization_loss()
                
                # backward
                '''
                lr = tf.train.exponential_decay(
                    learning_rate=1e-3,
                    global_step=global_step,
                    decay_steps=self.decay_step,
                    decay_rate=0.1,
                    staircase=True
                )
                '''
                lr = 1e-3
                optimizer     = tf.train.AdamOptimizer(lr)
                self.minimize = optimizer.minimize(loss=self._loss)

                # operation
                self._session = tf.Session()
                self._session.run(tf.global_variables_initializer())

Esempio n. 44

0

Mostra file

File: textcnn.py Progetto: KevinChen1994/nlp_basic_model

    def cnn(self):
        with tf.name_scope('embedding_layer'):
            embeddings = self.random_embedding(self.config.vocab_size,
                                               self.config.embedding_dim)
            embedding = tf.Variable(embeddings,
                                    dtype=tf.float32,
                                    trainable=True,
                                    name='word_embedding')
            embeddings_inputs = tf.nn.embedding_lookup(embedding, self.content)
            # embeddings_inputs[batch_size, sequence_length, embedding_dim]
            # 需要将输入转换成四维的矩阵，最后一维为深度
            # embeddings_inputs_expanded[batch_size, sequence_length, embedding_dim, input_depth]
            self.embeddings_inputs_expanded = tf.expand_dims(
                embeddings_inputs, -1)

        with tf.name_scope('cnn_layer'):
            # shape = [kernel_height, kernel_width, input_depth, output_depth]
            filter_weights = tf.get_variable(
                name='weights',
                shape=[
                    self.config.kernel_size, self.config.embedding_dim, 1,
                    self.config.num_filters
                ],
                initializer=tf.variance_scaling_initializer(),
                dtype=tf.float32)
            biases = tf.get_variable(name='biases',
                                     shape=[self.config.num_filters],
                                     initializer=tf.zeros_initializer(),
                                     dtype=tf.float32)
            ''''
            strides=[batch_stride, height_stride, width_stride, depth_stride]，第一个和第四个维度要求为1，因为卷积层的步长只对矩阵的长和宽有效。
            padding  VALID: 不填充;  SAME: 全0填充;
            不同的填充方法最终导致的输出矩阵的大小是不一样的，具体的计算方法为：
            使用全0填充 output_height=[input_height/stride_height]  output_width=[input_width/stride_width]
            不填充 output_height=[(input_height-filter_height+1)/stride_height]  output_width=[(input_width-filter_width+1)/stride_width]
            '''
            conv = tf.nn.conv2d(self.embeddings_inputs_expanded,
                                filter_weights,
                                strides=[1, 1, 1, 1],
                                padding='VALID')
            conv = tf.nn.relu(tf.nn.bias_add(conv, biases), name='conv')

            # ksize=[batch_kernel_size, height_kernel, width_kernel, depth_kernel_size]，第一个和第四个维度通常为1。
            # 这里设置过滤器的尺寸为在卷积完之后的高度和宽度，目的是池化完之后的结果第二个维度和第三个维度都为1，方便后边计算。
            pool = tf.nn.max_pool(conv,
                                  ksize=[
                                      1, self.config.seq_length -
                                      self.config.kernel_size + 1, 1, 1
                                  ],
                                  strides=[1, 1, 1, 1],
                                  padding='VALID',
                                  name='pool')

            # 将池化的结果修改维度，修改成二维矩阵，因为第二维和第三维本身就是1，那么最后的结果为[batch_size,num_filters]
            h = tf.reshape(pool, [-1, self.config.num_filters])
            w = tf.get_variable(
                name='w',
                shape=[self.config.num_filters, self.config.num_classes],
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)
            b = tf.get_variable(name='b',
                                shape=[self.config.num_classes],
                                initializer=tf.zeros_initializer(),
                                dtype=tf.float32)
            fc = tf.matmul(h, w) + b
            self.logtis = tf.nn.dropout(fc, self.config.dropout_prob)

        with tf.name_scope('optimize_layer'):
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logtis,
                                                           labels=self.label))

            self.optimizer = tf.train.AdamOptimizer(
                self.config.learning_rate).minimize(self.loss)

        with tf.name_scope('score'):
            self.predict_label = tf.argmax(self.logtis, 1)
            correct_pred = tf.equal(tf.argmax(self.logtis, 1),
                                    tf.argmax(self.label, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Esempio n. 45

0

Mostra file

            pool5 = tf.layers.dropout(pool5, rate=pooldropout_rate, seed=115, training=training)

    # Flatten output
    with tf.name_scope('flatten') as scope:
        flat_output = tf.contrib.layers.flatten(pool5)

        # dropout at fc rate
        flat_output = tf.layers.dropout(flat_output, rate=fcdropout_rate, seed=116, training=training)

    # Fully connected layer 1
    with tf.name_scope('fc1') as scope:
        fc1 = tf.layers.dense(
            flat_output,
            2048,
            activation=None,
            kernel_initializer=tf.variance_scaling_initializer(scale=2, seed=117),
            bias_initializer=tf.zeros_initializer(),
            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=lamF),
            name="fc1"
        )

        bn_fc1 = tf.layers.batch_normalization(
            fc1,
            axis=-1,
            momentum=0.9,
            epsilon=epsilon,
            center=True,
            scale=True,
            beta_initializer=tf.zeros_initializer(),
            gamma_initializer=tf.ones_initializer(),
            moving_mean_initializer=tf.zeros_initializer(),

Esempio n. 46

0

Mostra file

    def build_predict(self,
                      inputs,
                      reverse_preds=None,
                      embed_penultimate=False,
                      target_subset=None,
                      save_reprs=False):
        """Construct per-location real-valued predictions."""
        assert inputs is not None
        print('Targets pooled by %d to length %d' %
              (self.hp.target_pool, self.hp.seq_length // self.hp.target_pool))

        if self.hp.augment_mutation > 0:
            # sample mutation binary mask across sequences
            mut_mask_probs = self.hp.augment_mutation * np.ones(
                (self.hp.seq_length, 1))
            mut_mask_dist = tfp.distributions.Bernoulli(probs=mut_mask_probs,
                                                        dtype=tf.float32)
            mut_mask = mut_mask_dist.sample(tf.shape(inputs)[0])

            # sample random nucleotide for mutations
            mut_1hot_probs = 0.25 * np.ones((self.hp.seq_length, 4))
            mut_1hot_dist = tfp.distributions.OneHotCategorical(
                probs=mut_1hot_probs, dtype=tf.float32)
            mut_1hot = mut_1hot_dist.sample(tf.shape(inputs)[0])

            # modify sequence
            inputs_mut = inputs - mut_mask * inputs + mut_mask * mut_1hot
            inputs = tf.cond(self.is_training, lambda: inputs_mut,
                             lambda: inputs)

        ###################################################
        # convolution layers
        ###################################################
        filter_weights = []
        layer_reprs = [inputs]

        seqs_repr = inputs
        for layer_index in range(self.hp.cnn_layers):
            with tf.variable_scope('cnn%d' % layer_index, reuse=tf.AUTO_REUSE):
                # convolution block
                args_for_block = self._make_conv_block_args(
                    layer_index, layer_reprs)
                seqs_repr = layers.conv_block(seqs_repr=seqs_repr,
                                              **args_for_block)

                # save representation
                layer_reprs.append(seqs_repr)

        if save_reprs:
            self.layer_reprs = layer_reprs

        # final nonlinearity
        if self.hp.nonlinearity == 'relu':
            seqs_repr = tf.nn.relu(seqs_repr)
        elif self.hp.nonlinearity == 'gelu':
            seqs_repr = tf.nn.sigmoid(1.702 * seqs_repr) * seqs_repr
        else:
            print('Unrecognized nonlinearity "%s"' % self.hp.nonlinearity,
                  file=sys.stderr)
            exit(1)

        ###################################################
        # slice out side buffer
        ###################################################

        # update batch buffer to reflect pooling
        seq_length = seqs_repr.shape[1].value
        pool_preds = self.hp.seq_length // seq_length
        assert self.hp.batch_buffer % pool_preds == 0, (
            'batch_buffer %d not divisible'
            ' by the CNN pooling %d') % (self.hp.batch_buffer, pool_preds)
        batch_buffer_pool = self.hp.batch_buffer // pool_preds

        # slice out buffer
        seq_length = seqs_repr.shape[1]
        seqs_repr = seqs_repr[:, batch_buffer_pool:seq_length -
                              batch_buffer_pool, :]
        seq_length = seqs_repr.shape[1]

        ###################################################
        # final layer
        ###################################################
        if embed_penultimate:
            final_repr = seqs_repr
        else:
            with tf.variable_scope('final', reuse=tf.AUTO_REUSE):
                final_filters = self.hp.sum_targets * self.hp.target_classes
                final_repr = tf.layers.dense(
                    inputs=seqs_repr,
                    units=final_filters,
                    activation=None,
                    kernel_initializer=tf.variance_scaling_initializer(
                        scale=2.0, mode='fan_in'),
                    kernel_regularizer=tf.contrib.layers.l1_regularizer(
                        self.hp.final_l1_scale))
                print('Convolution w/ %d %dx1 filters to final targets' % \
                      (final_filters, seqs_repr.shape[2]))

                if target_subset is not None:
                    # get convolution parameters
                    filters_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, 'final/dense/kernel')[0]
                    bias_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, 'final/dense/bias')[0]

                    # subset to specific targets
                    filters_subset = tf.gather(filters_full,
                                               target_subset,
                                               axis=1)
                    bias_subset = tf.gather(bias_full, target_subset, axis=0)

                    # substitute a new limited convolution
                    final_repr = tf.tensordot(seqs_repr, filters_subset, 1)
                    final_repr = tf.nn.bias_add(final_repr, bias_subset)

                    # update # targets
                    self.hp.sum_targets = len(target_subset)

                # expand length back out
                if self.hp.target_classes > 1:
                    final_repr = tf.reshape(
                        final_repr, (-1, seq_length, self.hp.sum_targets,
                                     self.hp.target_classes))

        # transform for reverse complement
        if reverse_preds is not None:
            final_repr = tf.cond(reverse_preds,
                                 lambda: tf.reverse(final_repr, axis=[1]),
                                 lambda: final_repr)

        ###################################################
        # link function
        ###################################################
        if embed_penultimate:
            predictions = final_repr
        else:
            # work-around for specifying my own predictions
            # self.preds_adhoc = tf.placeholder(
            #     tf.float32, shape=final_repr.shape, name='preds-adhoc')

            # float 32 exponential clip max
            exp_max = 50

            # choose link
            if self.hp.link in ['identity', 'linear']:
                predictions = tf.identity(final_repr, name='preds')

            elif self.hp.link == 'relu':
                predictions = tf.relu(final_repr, name='preds')

            elif self.hp.link == 'exp':
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max,
                                                   exp_max)
                predictions = tf.exp(final_repr_clip, name='preds')

            elif self.hp.link == 'exp_linear':
                predictions = tf.where(
                    final_repr > 0,
                    final_repr + 1,
                    tf.exp(tf.clip_by_value(final_repr, -exp_max, exp_max)),
                    name='preds')

            elif self.hp.link == 'softplus':
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max, 10000)
                predictions = tf.nn.softplus(final_repr_clip, name='preds')

            else:
                print('Unknown link function %s' % self.hp.link,
                      file=sys.stderr)
                exit(1)

            # clip
            if self.hp.target_clip is not None:
                predictions = tf.clip_by_value(predictions, 0,
                                               self.hp.target_clip)

            # sqrt
            if self.hp.target_sqrt:
                predictions = tf.sqrt(predictions)

        return predictions

Esempio n. 47

0

Mostra file

def conv2d_fixed_padding(inputs,filters,kernel_size,strides,data_format):
    if strides>1:
        inputs = fixed_padding(inputs,kernel_size,data_format) #variance_scaling_initializer默认生成N(0,1/sqrt(n))的分布，对于kernel=(k,k,f_i,f_o),n=k*k*f_i
    return tf.layers.conv2d(inputs = inputs,filters=filters,kernel_size=kernel_size,
        strides = strides,padding=('SAME' if strides==1 else "VALID"),use_bias = False,
        kernel_initializer=tf.variance_scaling_initializer(),data_format=data_format)

Esempio n. 48

0

Mostra file

File: dqn_network.py Progetto: richard-kelly/component-action-rl

    def _get_network(self, inputs, use_histograms=False):
        screen = pysc2_common_net_funcs.preprocess_state_input(
            inputs, self._config)

        with tf.variable_scope('shared_spatial_network'):
            shared_spatial_net = network_utils.get_layers(
                screen,
                self._config['network_structure']['shared_spatial_network'],
                self._config['network_structure']['default_activation'],
                self._training,
                use_histograms=use_histograms)

        if self._config['network_structure'][
                'scale_gradients_at_shared_spatial_split']:
            with tf.variable_scope('spatial_gradient_scale'):
                # scale because multiple action component streams are meeting here
                # (always one more branch than number of spatial components)
                spatial_count = 1
                for name, using in self._action_components.items():
                    if using and name in pysc2_common_net_funcs.spatial_components:
                        spatial_count += 1
                scale = 1 / spatial_count
                shared_spatial_net = (1 - scale) * tf.stop_gradient(
                    shared_spatial_net) + scale * shared_spatial_net

        if self._config['dueling_network']:
            with tf.variable_scope('dueling_gradient_scale'):
                # scale the gradients entering last shared layer, as in original Dueling DQN paper
                scale = 1 / math.sqrt(2)
                shared_spatial_net = (1 - scale) * tf.stop_gradient(
                    shared_spatial_net) + scale * shared_spatial_net

        # for dueling net, split here
        if self._config['dueling_network']:
            with tf.variable_scope('value_network'):
                fc_value = network_utils.get_layers(
                    shared_spatial_net,
                    self._config['network_structure']['value_network'],
                    self._config['network_structure']['default_activation'],
                    self._training,
                    use_histograms=use_histograms)
                value = tf.layers.dense(
                    fc_value,
                    1,
                    activation=None,
                    kernel_initializer=tf.variance_scaling_initializer(
                        scale=2.0),
                    name='value')
        else:
            # returning this from the function for debugging purposes, so need it to exist if not using dueling net
            value = None

        with tf.variable_scope('shared_non_spatial_network'):
            shared_non_spatial = network_utils.get_layers(
                shared_spatial_net,
                self._config['network_structure']
                ['shared_non_spatial_network'],
                self._config['network_structure']['default_activation'],
                self._training,
                use_histograms=use_histograms)

        if self._config['network_structure'][
                'scale_gradients_at_shared_non_spatial_split']:
            with tf.variable_scope('non_spatial_gradient_scale'):
                # scale because multiple action component streams are meeting here
                non_spatial_count = 0
                for name, using in self._action_components.items():
                    if using and name not in pysc2_common_net_funcs.spatial_components:
                        non_spatial_count += 1
                scale = 1 / non_spatial_count
                shared_non_spatial = (1 - scale) * tf.stop_gradient(
                    shared_non_spatial) + scale * shared_non_spatial

        num_options = pysc2_common_net_funcs.get_num_options_per_function(
            self._config)

        # create each component stream
        component_streams = {}
        # final q vals with value added
        action_q_vals = {}
        # if another stream requires the output of another stream
        component_one_hots_or_embeddings = {}
        for c in pysc2_common_net_funcs.component_order:
            # are we using this component?
            if self._action_components[c]:
                with tf.variable_scope(c + '_stream'):
                    stream_input = shared_non_spatial
                    if c in pysc2_common_net_funcs.spatial_components:
                        stream_input = shared_spatial_net

                    # optionally one stream of fully connected layers per component
                    spec = self._config['network_structure'][
                        'component_stream_default']
                    if c in self._config['network_structure'][
                            'component_stream_specs']:
                        spec = self._config['network_structure'][
                            'component_stream_specs'][c]

                    # optionally feed one hot OR embedded versions of earlier stream outputs to this stream
                    dependencies = None
                    if self._config['network_structure'][
                            'use_stream_outputs_as_inputs_to_other_streams']:
                        if c in self._config['network_structure'][
                                'stream_dependencies']:
                            dependencies = []
                            for d in self._config['network_structure'][
                                    'stream_dependencies'][c]:
                                dependencies.append(
                                    component_one_hots_or_embeddings[d])

                    component_stream = network_utils.get_layers(
                        stream_input,
                        spec,
                        self._config['network_structure']
                        ['default_activation'],
                        self._training,
                        dependencies,
                        use_histograms=use_histograms)

                    if c not in pysc2_common_net_funcs.spatial_components or self._config[
                            'network_structure'][
                                'end_spatial_streams_with_dense_instead_of_flatten']:
                        # make a dense layer with width equal to number of possible actions
                        dense = tf.layers.Dense(num_options[c], name=c)
                        component_streams[c] = dense(component_stream)
                        if self._use_histograms:
                            weights = dense.kernel
                            bias = dense.bias
                            name = 'final_dense_' + c + '_'
                            tf.summary.histogram(name + 'weights', weights)
                            tf.summary.histogram(name + 'bias', bias)
                    else:
                        # flatten a conv output
                        component_streams[c] = tf.reshape(component_stream,
                                                          [-1, num_options[c]],
                                                          name=c)
                if self._use_histograms:
                    tf.summary.histogram('advantage_' + c,
                                         component_streams[c])
                if self._config['dueling_network']:
                    # action_q_vals is A(s,a), value is V(s)
                    # Q(s,a) = V(s) + A(s,a) - 1/|A| * SUM_a(A(s,a))
                    with tf.variable_scope('q_vals'):
                        advantage = component_streams[c]
                        action_q_vals[c] = tf.add(
                            value,
                            (advantage -
                             tf.reduce_mean(advantage, axis=1, keepdims=True)),
                            name=name)

                else:
                    action_q_vals[c] = component_streams[c]

                # filter out actions ('function') that are illegal for this state
                if c == 'function':
                    with tf.variable_scope('available_actions_mask'):
                        # available actions mask; avoids using negative infinity, and is the right size
                        action_neg_inf_q_vals = action_q_vals[
                            'function'] * 0 - 1000000
                        action_q_vals['function'] = tf.where(
                            inputs['available_actions'],
                            action_q_vals['function'], action_neg_inf_q_vals)

                if self._config['network_structure'][
                        'use_stream_outputs_as_inputs_to_other_streams']:
                    with tf.variable_scope('stream_action_one_hot'):
                        found_dependency = False
                        for stream, dependencies in self._config[
                                'network_structure'][
                                    'stream_dependencies'].items():
                            if self._action_components[
                                    stream] and c in dependencies:
                                found_dependency = True
                                break
                        if found_dependency:

                            action_index = tf.math.argmax(action_q_vals[c],
                                                          axis=-1)
                            if c == 'screen':
                                # special handling for screen->screen2 only
                                action_one_hot = tf.one_hot(
                                    action_index, num_options[c])
                                action_one_hot = tf.reshape(
                                    action_one_hot, [
                                        -1, self._config['env']['screen_size'],
                                        self._config['env']['screen_size'], 1
                                    ])
                                component_one_hots_or_embeddings[
                                    c] = tf.stop_gradient(action_one_hot)
                            if num_options[c] <= 10:
                                action_one_hot = tf.one_hot(
                                    action_index, num_options[c])
                                # argmax should be non-differentiable but just to remind myself use stop_gradient
                                component_one_hots_or_embeddings[
                                    c] = tf.stop_gradient(action_one_hot)
                            else:
                                component_one_hots_or_embeddings[
                                    c] = tf.keras.layers.Embedding(
                                        input_dim=num_options[c],
                                        output_dim=math.ceil(num_options[c]**(
                                            1 / 4.0)))(action_index)

        # return action_q_vals
        return action_q_vals, value, component_streams

Esempio n. 49

0

Mostra file

    def __call__(self, inputs, training):

        inputs = tf.identity(inputs, 'model_inputs')
        print('===================== model inputs', inputs)

        with self._model_variable_scope():

            # init conv
            if self.bottleneck:
                init_channel_num = self.k * 2
            else:
                init_channel_num = 16
            inputs = tf.layers.conv2d(
                inputs=inputs,
                filters=init_channel_num,
                kernel_size=3,
                strides=1,
                padding='SAME',
                use_bias=False,
                kernel_initializer=tf.variance_scaling_initializer(),
                name='init_conv')

            if not self.bottleneck:
                with tf.variable_scope('stage1'):
                    for block_num in range(self.N):
                        inputs = _add_layer(self, inputs, block_num, training)
                    inputs = _add_transition(self, inputs, 'transition1',
                                             training)

                with tf.variable_scope('stage2'):
                    for block_num in range(self.N):
                        inputs = _add_layer(self, inputs, block_num, training)
                    inputs = _add_transition(self, inputs, 'transition2',
                                             training)

                with tf.variable_scope('stage3'):
                    for block_num in range(self.N):
                        inputs = _add_layer(self, inputs, block_num, training)

            if self.bottleneck:
                with tf.variable_scope('stage1'):
                    for block_num in range(self.N):
                        inputs = _add_bottleneck_layer(self, inputs, block_num,
                                                       training)
                    inputs = _add_transition(self, inputs, 'transition1',
                                             training)

                with tf.variable_scope('stage2'):
                    for block_num in range(self.N):
                        inputs = _add_bottleneck_layer(self, inputs, block_num,
                                                       training)
                    inputs = _add_transition(self, inputs, 'transition2',
                                             training)

                with tf.variable_scope('stage3'):
                    for block_num in range(self.N):
                        inputs = _add_bottleneck_layer(self, inputs, block_num,
                                                       training)

            inputs = batch_norm(inputs, training, name='bnlast')
            inputs = tf.nn.relu(inputs)

            # global avg pooling
            inputs = tf.reduce_mean(inputs, [1, 2],
                                    keepdims=True,
                                    name='final_reduce_mean')
            inputs = tf.squeeze(inputs, [1, 2])

            inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)

            inputs = tf.identity(inputs, 'final_dense')
            print('===================== model outputs', inputs)

            return inputs

Esempio n. 50

0

Mostra file

File: convnet_builder.py Progetto: shih29242890/AI_google-research

 def conv(self,
          num_out_channels,
          k_height,
          k_width,
          d_height=1,
          d_width=1,
          mode='SAME',
          input_layer=None,
          num_channels_in=None,
          use_batch_norm=None,
          stddev=None,
          activation='relu',
          bias=0.0,
          kernel_initializer=None):
     """Construct a conv2d layer on top of cnn."""
     if input_layer is None:
         input_layer = self.top_layer
     if num_channels_in is None:
         num_channels_in = self.top_size
     if stddev is not None and kernel_initializer is None:
         kernel_initializer = tf.truncated_normal_initializer(stddev=stddev)
     if kernel_initializer is None:
         kernel_initializer = tf.variance_scaling_initializer()
     name = 'conv' + str(self.counts['conv'])
     self.counts['conv'] += 1
     with tf.variable_scope(name):
         strides = [1, d_height, d_width, 1]
         if self.data_format == 'NCHW':
             strides = [strides[0], strides[3], strides[1], strides[2]]
         if mode != 'SAME_RESNET':
             conv = self._conv2d_impl(input_layer,
                                      num_channels_in,
                                      num_out_channels,
                                      kernel_size=[k_height, k_width],
                                      strides=[d_height, d_width],
                                      padding=mode,
                                      kernel_initializer=kernel_initializer)
         else:  # Special padding mode for ResNet models
             if d_height == 1 and d_width == 1:
                 conv = self._conv2d_impl(
                     input_layer,
                     num_channels_in,
                     num_out_channels,
                     kernel_size=[k_height, k_width],
                     strides=[d_height, d_width],
                     padding='SAME',
                     kernel_initializer=kernel_initializer)
             else:
                 rate = 1  # Unused (for 'a trous' convolutions)
                 kernel_height_effective = k_height + (k_height -
                                                       1) * (rate - 1)
                 pad_h_beg = (kernel_height_effective - 1) // 2
                 pad_h_end = kernel_height_effective - 1 - pad_h_beg
                 kernel_width_effective = k_width + (k_width - 1) * (rate -
                                                                     1)
                 pad_w_beg = (kernel_width_effective - 1) // 2
                 pad_w_end = kernel_width_effective - 1 - pad_w_beg
                 padding = [[0, 0], [pad_h_beg, pad_h_end],
                            [pad_w_beg, pad_w_end], [0, 0]]
                 if self.data_format == 'NCHW':
                     padding = [
                         padding[0], padding[3], padding[1], padding[2]
                     ]
                 padded_input_layer = tf.pad(input_layer, padding)
                 conv = self._conv2d_impl(
                     padded_input_layer,
                     num_channels_in,
                     num_out_channels,
                     kernel_size=[k_height, k_width],
                     strides=[d_height, d_width],
                     padding='VALID',
                     kernel_initializer=kernel_initializer)
         if use_batch_norm is None:
             use_batch_norm = self.use_batch_norm
         mlperf.logger.log_conv2d(input_tensor=input_layer,
                                  output_tensor=conv,
                                  stride_height=d_height,
                                  stride_width=d_width,
                                  filters=num_out_channels,
                                  initializer=kernel_initializer,
                                  use_bias=not use_batch_norm
                                  and bias is not None)
         if not use_batch_norm:
             if bias is not None:
                 biases = self.get_variable(
                     'biases', [num_out_channels],
                     self.variable_dtype,
                     self.dtype,
                     initializer=tf.constant_initializer(bias))
                 biased = tf.reshape(
                     tf.nn.bias_add(conv,
                                    biases,
                                    data_format=self.data_format),
                     conv.get_shape())
             else:
                 biased = conv
         else:
             self.top_layer = conv
             self.top_size = num_out_channels
             biased = self.batch_norm(**self.batch_norm_config)
         if activation == 'relu':
             mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
             conv1 = self.relu(biased)
         elif activation == 'linear' or activation is None:
             conv1 = biased
         elif activation == 'tanh':
             conv1 = tf.nn.tanh(biased)
         else:
             raise KeyError('Invalid activation type \'%s\'' % activation)
         self.top_layer = conv1
         self.top_size = num_out_channels
         return conv1

Esempio n. 51

0

Mostra file

File: my-cart-pole-tensorflow-v003.py Progetto: xiaozhch5/sample-code

def q_network(state_tensor):
    inputs = state_tensor
    dense_outputs1 = tf.layers.dense(inputs=inputs, units=30, activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    dense_outputs2 = tf.layers.dense(inputs=dense_outputs1, units=30, activation=tf.nn.relu, kernel_initializer=tf.variance_scaling_initializer())
    outputs = tf.layers.dense(inputs=dense_outputs2, units=n_outputs, kernel_initializer=tf.variance_scaling_initializer())
    return outputs

Esempio n. 52

0

Mostra file

File: googlenet_model.py Progetto: sandyhouse/dlbenchmark

  def build_network(self, inputs, is_training):
    """Builds the forward pass of the model.

    Args:
      inputs: the list of inputs, excluding labels
      is_training: if in the phrase of training.

    Returns:
      The logits of the model.
    """

    def inception_v1(inputs, k, l, m, n, p, q):
      cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
              [('conv', n, 1, 1), ('conv', p, 5, 5)],
              [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
      return inception_module(inputs, cols, self.channel_pos)

    if self.data_format == 'NCHW':
      inputs = tf.transpose(inputs, [0, 3, 1, 2])

    conv1 = tf.layers.conv2d(
            inputs=inputs,
            filters=64,
            kernel_size=7,
            strides=2,
            padding='same',
            data_format=self.channel_pos,
            activation=tf.nn.relu,
            kernel_initializer=tf.variance_scaling_initializer(),
            bias_initializer=tf.constant_initializer(0.0)
            )
    pool1 = tf.layers.max_pooling2d(
            inputs=conv1,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    conv2 = tf.layers.conv2d(
            inputs=pool1,
            filters=64,
            kernel_size=1,
            strides=1,
            padding='same',
            data_format=self.channel_pos,
            activation=tf.nn.relu,
            kernel_initializer=tf.variance_scaling_initializer(),
            bias_initializer=tf.constant_initializer(0.0)
            )
    conv3 = tf.layers.conv2d(
            inputs=conv2,
            filters=192,
            kernel_size=3,
            strides=1,
            padding='same',
            data_format=self.channel_pos,
            activation=tf.nn.relu,
            kernel_initializer=tf.variance_scaling_initializer(),
            bias_initializer=tf.constant_initializer(0.0)
            )
    pool3 = tf.layers.max_pooling2d(
            inputs=conv3,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    output1 = inception_v1(pool3, 64, 96, 128, 16, 32, 32)
    output2 = inception_v1(output1, 128, 128, 192, 32, 96, 64)
    pool4 = tf.layers.max_pooling2d(
            inputs=output2,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    output3 = inception_v1(pool4, 192, 96, 208, 16, 48, 64)
    output4 = inception_v1(output3, 160, 112, 224, 24, 64, 64)
    output5 = inception_v1(output4, 128, 128, 256, 24, 64, 64)
    output6 = inception_v1(output5, 112, 144, 288, 32, 64, 64)
    output7 = inception_v1(output6, 256, 160, 320, 32, 128, 128)
    pool8 = tf.layers.max_pooling2d(
            inputs=output7,
            pool_size=3,
            strides=2,
            padding='same',
            data_format=self.channel_pos
            )
    output9 = inception_v1(pool8, 256, 160, 320, 32, 128, 128)
    output10 = inception_v1(output9, 384, 192, 384, 48, 128, 128)
    pool11 = tf.layers.average_pooling2d(
            inputs=output10,
            pool_size=7,
            strides=1,
            padding='valid',
            data_format=self.channel_pos
            )
    output12 = tf.reshape(pool11, [-1, 1024])
    stddev = np.sqrt(1.0 / self.num_classes)
    logits = tf.contrib.layers.fully_connected(
            inputs=output12,
            num_outputs=self.num_classes,
            activation_fn=None,
            weights_initializer=tf.truncated_normal_initializer(stddev),
            biases_initializer=tf.constant_initializer(0.0)
            )
    return logits

Esempio n. 53

0

Mostra file

File: 08_8_a3c_breakout_max_step.py Progetto: hccho2/RL-GYM

    def __init__(self, name, input_shape, output_dim, h_size=512, logdir=None):
        """A3C Network tensors and operations are defined here

        Args:
            name (str): The name of scope
            input_shape (list): The shape of input image [H, W, C]
            output_dim (int): Number of actions
            logdir (str, optional): directory to save summaries

        Notes:
            You should be familiar with Policy Gradients.
            The only difference between vanilla PG and A3C is that there is
            an operation to apply gradients manually
        """
        self.h_size = h_size
        with tf.variable_scope(name):
            #The network recieves a frame from the game, flattened into an array.
            #It then resizes it and processes it through four convolutional layers.
            self.stateInput = tf.placeholder(tf.float32,
                                             shape=[None, *input_shape],
                                             name='state')
            net = self.stateInput

            #init = tf.random_normal_initializer(mean=0.0, stddev=0.01, dtype=tf.float32)
            init = tf.variance_scaling_initializer(
                scale=2)  # He initialization
            net = tf.layers.conv2d(net,
                                   filters=32,
                                   kernel_size=8,
                                   strides=4,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)
            net = tf.layers.conv2d(net,
                                   filters=64,
                                   kernel_size=4,
                                   strides=2,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)
            net = tf.layers.conv2d(net,
                                   filters=64,
                                   kernel_size=3,
                                   strides=1,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)
            net = tf.layers.conv2d(net,
                                   filters=self.h_size,
                                   kernel_size=7,
                                   strides=1,
                                   padding='valid',
                                   kernel_initializer=init,
                                   activation=tf.nn.relu)

            #We take the output from the final convolutional layer and split it into separate advantage and value streams.

            self.streamAC, self.streamVC = tf.split(
                net, 2, 3)  # (N,1,1,512) --> (N,1,1,256), (N,1,1,256)

            self.streamAC = tf.layers.flatten(self.streamAC)
            self.Policy = tf.clip_by_value(
                tf.layers.dense(self.streamAC,
                                output_dim,
                                use_bias=True,
                                activation=tf.nn.softmax,
                                kernel_initializer=init), 1e-10, 1.)
            self.predict = tf.argmax(self.Policy, 1)

            self.streamVC = tf.layers.flatten(self.streamVC)
            self.Value = tf.layers.dense(self.streamVC,
                                         1,
                                         use_bias=False,
                                         activation=None,
                                         kernel_initializer=init)
            self.Value = tf.squeeze(self.Value)

            #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
            self.action = tf.placeholder(shape=[None],
                                         dtype=tf.int32,
                                         name='action_input')
            self.actions_onehot = tf.one_hot(self.action,
                                             output_dim,
                                             dtype=tf.float32,
                                             name='action_onehot')
            self.advantage = tf.placeholder(tf.float32,
                                            shape=[None],
                                            name="advantage_input")
            self.reward = tf.placeholder(tf.float32,
                                         shape=[None],
                                         name="reward_input")

            policy_gain = tf.boolean_mask(self.Policy, self.actions_onehot)
            policy_gain = tf.log(policy_gain) * self.advantage
            policy_gain = tf.reduce_mean(policy_gain, name="policy_gain")

            entropy = -tf.reduce_sum(self.Policy * tf.log(self.Policy), 1)
            entropy = tf.reduce_mean(entropy)

            value_loss = tf.losses.mean_squared_error(self.Value,
                                                      self.reward,
                                                      scope="value_loss")

            # Becareful negative sign because we only can minimize
            # we want to maximize policy gain and entropy (for exploration)

            self.total_loss = -policy_gain + 0.1 * value_loss - entropy * 0.02
            self.optimizer = tf.train.AdamOptimizer(learning_rate=0.00025)

        var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                     scope=name)
        self.gradients = self.optimizer.compute_gradients(
            self.total_loss, var_list)
        self.gradients_placeholders = []

        for grad, var in self.gradients:
            placeholder = tf.placeholder(var.dtype, shape=var.get_shape())
            placeholder = tf.clip_by_norm(placeholder, 40)
            self.gradients_placeholders.append((placeholder, var))
        self.apply_gradients = self.optimizer.apply_gradients(
            self.gradients_placeholders)

        if logdir:
            loss_summary = tf.summary.scalar("total_loss", self.total_loss)
            value_summary = tf.summary.histogram("values", self.values)

            self.summary_op = tf.summary.merge([loss_summary, value_summary])
            self.summary_writer = tf.summary.FileWriter(logdir)

Esempio n. 54

0

Mostra file

File: lstm_encoding_network.py Progetto: yuhonghong7035/agents

    def __init__(
        self,
        observation_spec,
        conv_layer_params=None,
        input_fc_layer_params=(75, 40),
        lstm_size=(40, ),
        output_fc_layer_params=(75, 40),
        activation_fn=tf.keras.activations.relu,
        name='LSTMEncodingNetwork',
    ):
        """Creates an instance of `LSTMEncodingNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These feed into the
        recurrent layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These are applied on top
        of the recurrent layer.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.
    """
        kernel_initializer = tf.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal')

        input_encoder = encoding_network.EncodingNetwork(
            observation_spec,
            conv_layer_params=conv_layer_params,
            fc_layer_params=input_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer)

        # Create RNN cell
        if len(lstm_size) == 1:
            cell = tf.keras.layers.LSTMCell(lstm_size[0])
        else:
            cell = tf.keras.layers.StackedRNNCells(
                [tf.keras.layers.LSTMCell(size) for size in lstm_size])

        output_encoder = ([
            tf.keras.layers.Dense(num_units,
                                  activation=activation_fn,
                                  kernel_initializer=kernel_initializer,
                                  name='/'.join([name, 'dense']))
            for num_units in output_fc_layer_params
        ])

        state_spec = nest.map_structure(
            functools.partial(tensor_spec.TensorSpec,
                              dtype=tf.float32,
                              name='network_state_spec'), cell.state_size)

        super(LSTMEncodingNetwork,
              self).__init__(observation_spec=observation_spec,
                             action_spec=None,
                             state_spec=state_spec,
                             name=name)

        self._conv_layer_params = conv_layer_params
        self._input_encoder = input_encoder
        self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
        self._output_encoder = output_encoder

Esempio n. 55

0

Mostra file

File: dqn.py Progetto: piaopiaodedudou/model-zoo

    def inference(self, inputs, trainable=True):
        with tf.name_scope('norm'):
            inputs = tf.div(tf.cast(inputs, tf.float32), 255.0)
            images = tf.split(inputs, [1, 1, 1, 1], axis=3)
            for i in range(4):
                tf.summary.image('input_images_%d' % i, images[i])

        def act(inputs):
            return tf.nn.leaky_relu(inputs, alpha=0.01)

        initializer = tf.variance_scaling_initializer()
        with tf.name_scope('conv1'):
            conv = tf.contrib.layers.conv2d(inputs,
                                            32,
                                            stride=4,
                                            kernel_size=8,
                                            activation_fn=act,
                                            trainable=trainable,
                                            padding='SAME',
                                            weights_initializer=initializer)

        with tf.name_scope('conv2'):
            conv = tf.contrib.layers.conv2d(conv,
                                            64,
                                            stride=2,
                                            kernel_size=4,
                                            activation_fn=act,
                                            trainable=trainable,
                                            padding='SAME',
                                            weights_initializer=initializer)

        with tf.name_scope('conv3'):
            conv = tf.contrib.layers.conv2d(conv,
                                            64,
                                            stride=1,
                                            kernel_size=3,
                                            activation_fn=act,
                                            trainable=trainable,
                                            padding='SAME',
                                            weights_initializer=initializer)

        with tf.name_scope('fully_connected'):
            flatten = tf.contrib.layers.flatten(conv)
            fc = tf.contrib.layers.fully_connected(
                flatten,
                512,
                trainable=trainable,
                activation_fn=act,
                weights_initializer=initializer)

        with tf.name_scope('output'):
            w = tf.get_variable('ow',
                                shape=[512, self.config.action_size],
                                trainable=trainable,
                                initializer=initializer)
            b = tf.get_variable('ob',
                                shape=[self.config.action_size],
                                trainable=trainable,
                                initializer=tf.zeros_initializer())
            outputs = tf.add(tf.matmul(fc, w), b, name='q_values')
        return outputs

Esempio n. 56

0

Mostra file

def init_hidden_uniform():
    return tf.variance_scaling_initializer(scale=1.0 / 3.0,
                                           mode="fan_in",
                                           distribution="uniform")

Esempio n. 57

0

Mostra file

def fpn_model(features):
    """
    Args:
        features ([tf.Tensor]): ResNet features c2-c5

    Returns:
        [tf.Tensor]: FPN features p2-p6
    """
    assert len(features) == 4, features
    num_channel = cfg.FPN.NUM_CHANNEL

    use_gn = cfg.FPN.NORM == "GN"

    def upsample2x(name, x):
        return FixedUnPooling(
            name, x, 2, unpool_mat=np.ones((2, 2), dtype="float32"), data_format="channels_first"
        )

        # tf.image.resize is, again, not aligned.
        # with tf.name_scope(name):
        #     shape2d = tf.shape(x)[2:]
        #     x = tf.transpose(x, [0, 2, 3, 1])
        #     x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
        #     x = tf.transpose(x, [0, 3, 1, 2])
        #     return x

    with argscope(
        Conv2D,
        data_format="channels_first",
        activation=tf.identity,
        use_bias=True,
        kernel_initializer=tf.variance_scaling_initializer(scale=1.0),
    ):
        lat_2345 = [
            Conv2D("lateral_1x1_c{}".format(i + 2), c, num_channel, 1)
            for i, c in enumerate(features)
        ]
        if use_gn:
            lat_2345 = [GroupNorm("gn_c{}".format(i + 2), c) for i, c in enumerate(lat_2345)]
        lat_sum_5432 = []
        for idx, lat in enumerate(lat_2345[::-1]):
            if idx == 0:
                lat_sum_5432.append(lat)
            else:
                lat = lat + upsample2x("upsample_lat{}".format(6 - idx), lat_sum_5432[-1])
                lat_sum_5432.append(lat)
        p2345 = [
            Conv2D("posthoc_3x3_p{}".format(i + 2), c, num_channel, 3)
            for i, c in enumerate(lat_sum_5432[::-1])
        ]
        if use_gn:
            p2345 = [GroupNorm("gn_p{}".format(i + 2), c) for i, c in enumerate(p2345)]
        p6 = MaxPooling(
            "maxpool_p6",
            p2345[-1],
            pool_size=1,
            strides=2,
            data_format="channels_first",
            padding="VALID",
        )
        return p2345 + [p6]

Esempio n. 58

0

Mostra file

File: vc_transformer.py Progetto: lipreading-ch/lipreading-ch-tr

    def __call__(self,
                 inputs,
                 inputs_unpadded_length,
                 former_encoder_input,
                 multi_attention_bias,
                 targets=None):
        """Calculate target logits or inferred target sequences.

    Args:
      inputs: int tensor with shape [batch_size, input_length, hidden_size].
      inputs_unpadded_length: int tensor with shape [batch_size, ]. Indicate the actual length of each input.
      targets: None or int tensor with shape [batchCalculate target logits or inferred target sequences.

    Args:
      inputs: int tensor with shape [batch_size, input_length, hidden_size].
      inputs_unpadded_length: int tensor with shape [batch_size, ]. Indicate the actual length of each input.
      targets: None or int tensor with shape [batch_size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          output: [batch_size, decoded length]
          score: [batch_size, float]}
    _size, target_length].

    Returns:
      If targets is defined, then return logits for each word in the target
      sequence. float tensor with shape [batch_size, target_length, vocab_size]
      If target is none, then generate output sequence one token at a time.
        returns a dictionary {
          output: [batch_size, decoded length]
          score: [batch_size, float]}
    """
        # Variance scaling is used here because it seems to work in many problems.
        # Other reasonable initializers may also work just as well.
        initializer = tf.variance_scaling_initializer(
            self.params["initializer_gain"],
            mode="fan_avg",
            distribution="uniform")
        with tf.variable_scope(self.scope, initializer=initializer):
            # Calculate attention bias for encoder self-attention and decoder
            # multi-headed attention layers.
            attention_bias = model_utils.get_padding_bias(
                inputs, inputs_unpadded_length)

            # Run the inputs through the encoder layer to map the symbol
            # representations to continuous representations.
            encoder_outputs = self.encode(inputs, attention_bias,
                                          inputs_unpadded_length,
                                          former_encoder_input,
                                          multi_attention_bias)

            # Generate output sequence if targets is None, or return logits if target
            # sequence is known.
            if targets is None:
                return self.predict(encoder_outputs, attention_bias)
            else:
                logits = self.decode(targets, encoder_outputs, attention_bias)
                return logits

Esempio n. 59

0

Mostra file

# Model architecture parameters
n_stocks = 500
n_neurons_1 = 1024
n_neurons_2 = 512
n_neurons_3 = 256
n_neurons_4 = 128
n_target = 1

# Placeholder
X = tf.placeholder(dtype=tf.float32, shape=[None, n_stocks])
Y = tf.placeholder(dtype=tf.float32, shape=[None])

# Initializers
sigma = 1
weight_initializer = tf.variance_scaling_initializer(mode="fan_avg",
                                                     distribution="uniform",
                                                     scale=sigma)
bias_initializer = tf.zeros_initializer()

# Layer 1: Variables for hidden weights and biases
W_hidden_1 = tf.Variable(weight_initializer([n_stocks, n_neurons_1]))
bias_hidden_1 = tf.Variable(bias_initializer([n_neurons_1]))

# Layer 2: Variables for hidden weights and biases
W_hidden_2 = tf.Variable(weight_initializer([n_neurons_1, n_neurons_2]))
bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2]))

# Layer 3: Variables for hidden weights and biases
W_hidden_3 = tf.Variable(weight_initializer([n_neurons_2, n_neurons_3]))
bias_hidden_3 = tf.Variable(bias_initializer([n_neurons_3]))

Esempio n. 60

0

Mostra file

def fpn_model(features):
    """
    Args:
        features ([tf.Tensor]): ResNet features c2-c5

    Returns:
        [tf.Tensor]: FPN features p2-p6
    """
    assert len(features) == 4, features
    num_channel = cfg.FPN.NUM_CHANNEL

    use_gn = cfg.FPN.NORM == 'GN'

    def upsample2x(name, x):
        try:
            resize = tf.compat.v2.image.resize_images
            with tf.name_scope(name):
                shp2d = tf.shape(x)[2:]
                x = tf.transpose(x, [0, 2, 3, 1])
                x = resize(x, shp2d * 2, 'nearest')
                x = tf.transpose(x, [0, 3, 1, 2])
                return x
        except AttributeError:
            return FixedUnPooling(name,
                                  x,
                                  2,
                                  unpool_mat=np.ones((2, 2), dtype='float32'),
                                  data_format='channels_first')

    with argscope(
            Conv2D,
            data_format='channels_first',
            activation=tf.identity,
            use_bias=True,
            kernel_initializer=tf.variance_scaling_initializer(scale=1.)):
        lat_2345 = [
            Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1)
            for i, c in enumerate(features)
        ]
        if use_gn:
            lat_2345 = [
                GroupNorm('gn_c{}'.format(i + 2), c)
                for i, c in enumerate(lat_2345)
            ]
        lat_sum_5432 = []
        for idx, lat in enumerate(lat_2345[::-1]):
            if idx == 0:
                lat_sum_5432.append(lat)
            else:
                lat = lat + upsample2x('upsample_lat{}'.format(6 - idx),
                                       lat_sum_5432[-1])
                lat_sum_5432.append(lat)
        p2345 = [
            Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3)
            for i, c in enumerate(lat_sum_5432[::-1])
        ]
        if use_gn:
            p2345 = [
                GroupNorm('gn_p{}'.format(i + 2), c)
                for i, c in enumerate(p2345)
            ]
        p6 = MaxPooling('maxpool_p6',
                        p2345[-1],
                        pool_size=1,
                        strides=2,
                        data_format='channels_first',
                        padding='VALID')
        return p2345 + [p6]