Ejemplo n.º 1
0
Archivo: model.py Proyecto: hfxunlp/ATR
    def forward(self, inputs, lengths=None, state=None):

        if lengths is None:
            lengths = inputs.new_ones(inputs.size())
            lengths = lengths.sum(1)

        word_embed = self.embedding(inputs)
        word_embed = util.apply_dropout(word_embed, self.dropout,
                                        self.training)

        outputs = pack_padded_sequence(word_embed, lengths, batch_first=True)
        hs = []
        for l in range(self.num_layers):
            # extract states
            init_state = tuple(s[l:l + 1] for s in state)
            # we simply stacking multiple rnn layers
            outputs, h = self.rnn[l](outputs, init_state=init_state)
            hs.append(h)

        outputs, lengths = pad_packed_sequence(outputs, batch_first=True)

        if self.project:
            outputs = self.h2h(outputs)
        logits = self.h2o(outputs)

        outputs_size = outputs.size()
        logits = logits.view(outputs_size[0], outputs_size[1], logits.size(-1))

        state = tuple([torch.cat(vs, 0) for vs in list(zip(*hs))])

        return logits, state
Ejemplo n.º 2
0
def basic_block(inputs: tf.Tensor, filters: int, strides: int, l2: float,
                dropout_rate: float, use_mc_dropout: bool,
                conv_layer: tf.keras.layers.Layer):
    """Basic residual block of two 3x3 convs.

  Args:
    inputs: tf.Tensor.
    filters: Number of filters for Conv2D.
    strides: Stride dimensions for Conv2D.
    l2: L2 regularization coefficient.
    dropout_rate: Dropout rate.
    use_mc_dropout: Whether to apply Monte Carlo dropout.
    conv_layer: (tf.keras.layers.Layer) The conv layer used.

  Returns:
    tf.Tensor.
  """
    x = inputs
    y = inputs
    y = BatchNormalization(beta_regularizer=tf.keras.regularizers.l2(l2),
                           gamma_regularizer=tf.keras.regularizers.l2(l2))(y)
    y = tf.keras.layers.Activation('relu')(y)
    y = models_util.apply_dropout(y, dropout_rate, use_mc_dropout)

    y = conv_layer(filters=filters,
                   strides=strides,
                   kernel_regularizer=tf.keras.regularizers.l2(l2))(y)
    y = BatchNormalization(beta_regularizer=tf.keras.regularizers.l2(l2),
                           gamma_regularizer=tf.keras.regularizers.l2(l2))(y)
    y = tf.keras.layers.Activation('relu')(y)
    y = models_util.apply_dropout(y, dropout_rate, use_mc_dropout)

    y = conv_layer(filters=filters,
                   strides=1,
                   kernel_regularizer=tf.keras.regularizers.l2(l2))(y)
    if not x.shape.is_compatible_with(y.shape):
        x = conv_layer(filters=filters,
                       kernel_size=1,
                       strides=strides,
                       kernel_regularizer=tf.keras.regularizers.l2(l2))(x)
        y = models_util.apply_dropout(y, dropout_rate, use_mc_dropout)

    x = tf.keras.layers.add([x, y])
    return x
Ejemplo n.º 3
0
    def forward(self, inputs, init_state=None):
        # inputs: packed sequence with shape [batch, length, dim]
        # init_state: [1, batch, dim]
        # just valid for one-layer lstm

        seq_tensor, seq_lengths = pad_packed_sequence(inputs,
                                                      batch_first=True)
        batch_size, time_steps = seq_tensor.size()[:2]

        if init_state is None:
            prev_h = seq_tensor.new_zeros(batch_size, self.hidden_size)
            prev_c = seq_tensor.new_zeros(batch_size, self.hidden_size)
        else:
            prev_h = init_state[0].squeeze(0)
            prev_c = init_state[1].squeeze(0)

        valid_batch_index = batch_size - 1
        outputs = seq_tensor.new_zeros(batch_size, time_steps, self.hidden_size)

        for step in range(time_steps):

            index = step

            # 1. generate the valid batch index
            while seq_lengths[valid_batch_index] <= index:
                valid_batch_index -= 1

            # 2. extract rnn input and previous states
            t = valid_batch_index + 1
            x = seq_tensor[:t, index]
            h_ = prev_h[:t].clone()
            c_ = prev_c[:t].clone()

            preact = self.i2h(x) + self.h2h(h_)

            i, f, o, g = preact.split(self.hidden_size, dim=-1)

            c = c_ * f.sigmoid() + i.sigmoid() * g.tanh()

            h = o.sigmoid() * c.tanh()

            prev_h = prev_h.data.clone()
            prev_c = prev_c.data.clone()
            prev_h[:t] = h
            prev_c[:t] = c
            outputs[:t, index] = h

        outputs = util.apply_dropout(outputs, self.dropout, self.training)
        outputs = pack_padded_sequence(outputs, seq_lengths, batch_first=True)

        # [num_layer * direction, batch, dim]
        final_state = (prev_h.unsqueeze(0), prev_c.unsqueeze(0))

        return outputs, final_state
def create_model(batch_size: int,
                 len_seqs: int,
                 num_motifs: int,
                 len_motifs: int,
                 num_denses: int,
                 num_classes: int = 10,
                 embed_size: int = 4,
                 one_hot: bool = True,
                 l2_weight: float = 0.0,
                 dropout_rate: float = 0.1,
                 before_conv_dropout: bool = False,
                 use_mc_dropout: bool = False,
                 spec_norm_hparams: Dict[str, Any] = None,
                 gp_layer_hparams: Dict[str, Any] = None,
                 **unused_kwargs: Dict[str, Any]) -> tf.keras.models.Model:

  """Builds Genomics CNN model.

  Args:
    batch_size: (int) Value of the static per_replica batch size.
    len_seqs: (int) Sequence length.
    num_motifs: (int) Number of motifs (= number of filters) to apply to input.
    len_motifs: (int) Length of the motifs (= size of convolutional filters).
    num_denses: (int) Number of nodes in the dense layer.
    num_classes: (int) Number of output classes.
    embed_size: (int) Static size of hidden dimension of the embedding output.
    one_hot: (bool) If using one hot encoding to encode input sequences.
    l2_weight: (float) L2 regularization coefficient.
    dropout_rate: (float) Fraction of the convolutional output units and dense.
      layer output units to drop.
    before_conv_dropout: (bool) Whether to use filter wise dropout before the
      convolutional layer.
    use_mc_dropout: (bool) Whether to apply Monte Carlo dropout.
    spec_norm_hparams: (dict) Hyperparameters for spectral normalization.
    gp_layer_hparams: (dict) Hyperparameters for Gaussian Process output layer.
    **unused_kwargs: (dict) Unused keyword arguments that will be ignored by the
      model.

  Returns:
    (tf.keras.Model) The 1D convolutional model for genomic sequences.
  """
  # define layers
  if spec_norm_hparams:
    spec_norm_bound = spec_norm_hparams['spec_norm_bound']
    spec_norm_iteration = spec_norm_hparams['spec_norm_iteration']
  else:
    spec_norm_bound = None
    spec_norm_iteration = None

  conv_layer = models_util.make_conv2d_layer(
      use_spec_norm=(spec_norm_hparams is not None),
      spec_norm_bound=spec_norm_bound,
      spec_norm_iteration=spec_norm_iteration)

  dense_layer = models_util.make_dense_layer(
      use_spec_norm=(spec_norm_hparams is not None),
      spec_norm_bound=spec_norm_bound,
      spec_norm_iteration=spec_norm_iteration)

  output_layer = models_util.make_output_layer(
      gp_layer_hparams=gp_layer_hparams)

  # compute outputs given inputs
  inputs = tf.keras.Input(
      shape=[len_seqs], batch_size=batch_size, dtype=tf.int32)
  x = _input_embedding(
      inputs, VOCAB_SIZE, one_hot=one_hot, embed_size=embed_size)

  # filter-wise dropout before conv,
  # x.shape=[batch_size, len_seqs, vocab_size/embed_size]
  if before_conv_dropout:
    x = models_util.apply_dropout(
        x,
        dropout_rate,
        use_mc_dropout,
        filter_wise_dropout=True,
        name='conv_dropout')

  x = _conv_pooled_block(
      x,
      conv_layer=conv_layer(
          filters=num_motifs,
          kernel_size=(len_motifs, embed_size),
          strides=(1, 1),
          kernel_regularizer=tf.keras.regularizers.l2(l2_weight),
          name='conv'))
  x = models_util.apply_dropout(
      x, dropout_rate, use_mc_dropout, name='dropout1')
  x = dense_layer(
      units=num_denses,
      activation=tf.keras.activations.relu,
      kernel_regularizer=tf.keras.regularizers.l2(l2_weight),
      name='dense')(
          x)
  x = models_util.apply_dropout(
      x, dropout_rate, use_mc_dropout, name='dropout2')
  if gp_layer_hparams and gp_layer_hparams['gp_input_dim'] > 0:
    # Uses random projection to reduce the input dimension of the GP layer.
    x = tf.keras.layers.Dense(
        gp_layer_hparams['gp_input_dim'],
        kernel_initializer='random_normal',
        use_bias=False,
        trainable=False,
        name='gp_random_projection')(
            x)
  outputs = output_layer(num_classes, name='logits')(x)
  return tf.keras.Model(inputs=inputs, outputs=outputs)
Ejemplo n.º 5
0
def wide_resnet(batch_size: Optional[int],
                input_shape: Iterable[int],
                depth: int,
                width_multiplier: int,
                num_classes: int,
                l2: float,
                dropout_rate: float,
                use_mc_dropout: bool,
                spec_norm_hparams: Dict[str, Any] = None,
                gp_layer_hparams: Dict[str, Any] = None):
    """Builds Wide ResNet.

  Following Zagoruyko and Komodakis (2016), it accepts a width multiplier on the
  number of filters. Using three groups of residual blocks, the network maps
  spatial features of size 32x32 -> 16x16 -> 8x8.

  Args:
    batch_size: (int) Value of the static per_replica batch size.
    input_shape: (tf.Tensor) shape of input to the model.
    depth: Total number of convolutional layers. "n" in WRN-n-k. It differs from
      He et al. (2015)'s notation which uses the maximum depth of the network
      counting non-conv layers like dense.
    width_multiplier: Integer to multiply the number of typical filters by. "k"
      in WRN-n-k.
    num_classes: Number of output classes.
    l2: L2 regularization coefficient.
    dropout_rate: Dropout rate.
    use_mc_dropout: Whether to apply Monte Carlo dropout.
    spec_norm_hparams: (dict) Hyperparameters for spectral normalization.
    gp_layer_hparams: (dict) Hyperparameters for Gaussian Process output layer.

  Returns:
    tf.keras.Model.
  """
    if (depth - 4) % 6 != 0:
        raise ValueError('depth should be 6n+4 (e.g., 16, 22, 28, 40).')
    num_blocks = (depth - 4) // 6
    inputs = tf.keras.layers.Input(shape=input_shape, batch_size=batch_size)

    # pylint: disable=invalid-name
    if spec_norm_hparams:
        spec_norm_bound = spec_norm_hparams['spec_norm_bound']
        spec_norm_iteration = spec_norm_hparams['spec_norm_iteration']
    else:
        spec_norm_bound = None
        spec_norm_iteration = None
    conv2d = models_util.make_conv2d_layer(
        kernel_size=3,
        use_bias=False,
        kernel_initializer='he_normal',
        activation=None,
        use_spec_norm=(spec_norm_hparams is not None),
        spec_norm_bound=spec_norm_bound,
        spec_norm_iteration=spec_norm_iteration)

    x = conv2d(filters=16,
               strides=1,
               kernel_regularizer=tf.keras.regularizers.l2(l2))(inputs)
    x = models_util.apply_dropout(x,
                                  dropout_rate,
                                  use_mc_dropout,
                                  filter_wise_dropout=True)

    x = group(x,
              filters=16 * width_multiplier,
              strides=1,
              num_blocks=num_blocks,
              l2=l2,
              dropout_rate=dropout_rate,
              use_mc_dropout=use_mc_dropout,
              conv_layer=conv2d)
    x = group(x,
              filters=32 * width_multiplier,
              strides=2,
              num_blocks=num_blocks,
              l2=l2,
              dropout_rate=dropout_rate,
              use_mc_dropout=use_mc_dropout,
              conv_layer=conv2d)
    x = group(x,
              filters=64 * width_multiplier,
              strides=2,
              num_blocks=num_blocks,
              l2=l2,
              dropout_rate=dropout_rate,
              use_mc_dropout=use_mc_dropout,
              conv_layer=conv2d)
    x = BatchNormalization(beta_regularizer=tf.keras.regularizers.l2(l2),
                           gamma_regularizer=tf.keras.regularizers.l2(l2))(x)
    x = tf.keras.layers.Activation('relu')(x)
    x = tf.keras.layers.AveragePooling2D(pool_size=8)(x)
    x = tf.keras.layers.Flatten()(x)

    if gp_layer_hparams:
        # add random projection layer to reduce dimension
        gp_output_layer = functools.partial(
            ed.layers.RandomFeatureGaussianProcess,
            num_inducing=gp_layer_hparams['gp_hidden_dim'],
            gp_kernel_scale=gp_layer_hparams['gp_scale'],
            gp_output_bias=gp_layer_hparams['gp_bias'],
            normalize_input=gp_layer_hparams['gp_input_normalization'],
            gp_cov_momentum=gp_layer_hparams['gp_cov_discount_factor'],
            gp_cov_ridge_penalty=gp_layer_hparams['gp_cov_ridge_penalty'])
        if gp_layer_hparams['gp_input_dim'] > 0:
            x = tf.keras.layers.Dense(gp_layer_hparams['gp_input_dim'],
                                      kernel_initializer='random_normal',
                                      use_bias=False,
                                      trainable=False)(x)
        logits, covmat = gp_output_layer(num_classes)(x)
    else:
        logits = tf.keras.layers.Dense(
            num_classes,
            kernel_initializer='he_normal',
            kernel_regularizer=tf.keras.regularizers.l2(l2),
            bias_regularizer=tf.keras.regularizers.l2(l2))(x)
        covmat = tf.eye(batch_size)

    return tf.keras.Model(inputs=inputs, outputs=[logits, covmat])