Beispiel #1
0
def conv2d(x,
           name,
           filter_size,
           in_channels,
           out_channels,
           strides,
           bias=True):
    """2D convolution."""
    with tf.variable_scope(name):
        kernel = tf.get_variable(
            name='DW',
            shape=[filter_size[0], filter_size[1], in_channels, out_channels],
            dtype=tf.float32,
            initializer=tf.initializers.glorot_uniform())
        if bias:
            b = tf.get_variable(name='bias',
                                shape=[out_channels],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
        out = tf.nn.conv2d(x,
                           kernel, [1, strides[0], strides[1], 1],
                           padding='SAME')
        if bias:
            out = tf.nn.bias_add(out, b)
        return out
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        config = self.config
        model_config = config['model']['net']['structure']
        self.num_classes = config['data']['task']['classes']['num']
        self.vocab_size = config['data']['task']['text']['vocab_size']
        self.max_text_len = config['data']['task']['text']['max_text_len']
        self.use_pretrained_embedding = config['model']['use_pre_train_emb']
        self.embedding_size = model_config['embedding_size']
        self.hidden_dim = model_config['hidden_dim']
        self.head_num = model_config['head_num']
        self.inner_size = model_config['inner_size']
        self.dropout_rate = model_config['dropout_rate']
        self.speech_dropout_rate = model_config['speech_dropout_rate']
        self.padding_id = model_config.get('padding_id', 0)
        self.speech_dense_act = config.get('speech_dense_act', 'relu')

        if self.use_pretrained_embedding:
            self.embedding_path = config['model']['embedding_path']
            logging.info("Loading embedding file from: {}".format(
                self.embedding_path))
            self._word_embedding_init = pickle.load(
                open(self.embedding_path, 'rb'))
            self.embed_initializer = tf.constant_initializer(
                self._word_embedding_init)
        else:
            self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1)

        self.embed = tf.keras.layers.Embedding(
            self.vocab_size,
            self.embedding_size,
            embeddings_initializer=self.embed_initializer)
        self.speech_enc_layer = delta.layers.RnnEncoder(config,
                                                        name="speech_encoder")
        self.text_enc_layer = delta.layers.RnnEncoder(config,
                                                      name="text_encoder")

        self.align_attn_layer = delta.layers.MultiHeadAttention(
            self.hidden_dim, self.head_num)
        self.align_enc_layer = delta.layers.RnnAttentionEncoder(
            config, name="align_encoder")

        self.embed_d = tf.keras.layers.Dropout(self.dropout_rate)
        self.speech_d = tf.keras.layers.Dropout(self.speech_dropout_rate)
        self.speech_enc_d = tf.keras.layers.Dropout(self.speech_dropout_rate)
        self.text_enc_d = tf.keras.layers.Dropout(self.dropout_rate)
        self.attn_enc_d = tf.keras.layers.Dropout(self.dropout_rate)
        self.align_enc_d = tf.keras.layers.Dropout(self.dropout_rate)
        self.final_dense = tf.keras.layers.Dense(
            self.num_classes, activation=tf.keras.activations.linear)

        self.align_layer_norm = tf.keras.layers.LayerNormalization(
            epsilon=1e-6)
        self.speech_dense = tf.keras.layers.Dense(
            512, activation=self.speech_dense_act)
Beispiel #3
0
 def prelu_layer(self, x, name, num_parameters=1, init=0.25):
     if num_parameters == 1:
         shape = 1
     else:
         shape = x.get_shape()[-1]
     alpha = tf.get_variable(name,
                             shape=shape,
                             dtype=x.dtype,
                             initializer=tf.constant_initializer(init))
     return tf.maximum(0.0, x) + alpha * tf.minimum(0.0, x)
Beispiel #4
0
def tdnn(x,
         name,
         in_dim,
         context,
         out_dim,
         has_bias=True,
         method='splice_layer'):
    '''
  TDNN implementation.

  Args:
    context:
      a int of left and right context, or
      a list of context indexes, e.g. (-2, 0, 2).
    method:
      splice_layer: use column-first patch-based copy.
      splice_op: use row-first while_loop copy.
      conv1d: use conv1d as TDNN equivalence.
  '''
    if hasattr(context, '__iter__'):
        context_size = len(context)
        if method in ('splice_op', 'conv1d'):
            msg = 'Method splice_op and conv1d does not support context list.'
            raise ValueError(msg)
        context_list = context
    else:
        context_size = context * 2 + 1
        context_list = range(-context, context + 1)
    with tf.variable_scope(name):
        if method == 'splice_layer':
            x = splice_layer(x, 'splice', context_list)
            x = linear(x,
                       'linear', [in_dim * context_size, out_dim],
                       has_bias=has_bias)
        elif method == 'splice_op':
            x = speech_ops.splice(x, context, context)
            x = linear(x,
                       'linear', [in_dim * context_size, out_dim],
                       has_bias=has_bias)
        elif method == 'conv1d':
            kernel = tf.get_variable(
                name='DW',
                shape=[context, in_dim, out_dim],
                dtype=tf.float32,
                initializer=tf.glorot_uniform_initializer())
            x = tf.nn.conv1d(x, kernel, stride=1, padding='SAME')
            if has_bias:
                b = tf.get_variable(name='bias',
                                    shape=[out_dim],
                                    dtype=tf.float32,
                                    initializer=tf.constant_initializer(0.0))
                x = tf.nn.bias_add(x, b)
        else:
            raise ValueError('Unsupported method: %s.' % (method))
        return x
Beispiel #5
0
 def __init__(self, max_len, embedding_dim, **kwargs):
     self.max_len = max_len
     self.embedding_dim = embedding_dim
     self.pos_embedding_matrix = self.get_pos_embedding_matrix(
         self.max_len, self.embedding_dim)
     embed_initializer = tf.constant_initializer(self.pos_embedding_matrix)
     self.pos_embedding_layer = tf.keras.layers.Embedding(
         *self.pos_embedding_matrix.shape,
         trainable=False,
         embeddings_initializer=embed_initializer)
     self.get_pos_layer = tf.keras.layers.Lambda(self.get_pos)
     self.mask_layer = tf.keras.layers.Lambda(self.mask_outputs)
     super().__init__(**kwargs)
  def __init__(self, config, **kwargs):
    super().__init__(**kwargs)
    logging.info("Initialize MatchRnn...")

    self.use_pretrained_embedding = config['model']['use_pre_train_emb']
    if self.use_pretrained_embedding:
      self.embedding_path = config['model']['embedding_path']
      logging.info("Loading embedding file from: {}".format(
          self.embedding_path))
      self._word_embedding_init = pickle.load(open(self.embedding_path, 'rb'))
      self.embed_initializer = tf.constant_initializer(
          self._word_embedding_init)
    else:
      self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1)
Beispiel #7
0
  def __init__(self, config, **kwargs):
    super().__init__(config, **kwargs)
    model_config = config['model']['net']['structure']
    self.dropout_rate = model_config['dropout_rate']

    self.sequence_length = config['data']['task']['max_seq_len']
    self.vocab_size = config['data']['vocab_size']
    self.num_classes = config['data']['task']['classes']['num_classes']

    self.embedding_size = model_config['embedding_size']
    self.num_units = model_config['num_units']
    self.num_layers = model_config['num_layers']
    self.filter_sizes = model_config['filter_sizes']
    self.num_filters = model_config['num_filters']

    self.l2_reg_lambda = model_config['l2_reg_lambda']

    self.embed = tf.keras.layers.Embedding(
        self.vocab_size,
        self.embedding_size,
        embeddings_initializer=self.embed_initializer)

    self.conv2ds = []
    self.pools = []
    for i, filter_size in enumerate(self.filter_sizes):
      conv2d = tf.keras.layers.Conv2D(
          filters=self.num_filters,
          kernel_size=(filter_size, self.embedding_size),
          kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
          bias_initializer=tf.constant_initializer(value=0.0),
          padding='valid',
          name='conv_{}'.format(i))
      pool = tf.keras.layers.MaxPool2D(
          pool_size=(self.sequence_length - filter_size + 1, 1),
          strides=(1, 1),
          padding='valid',
          name='name_{}'.format(i))
      self.conv2ds.append(conv2d)
      self.pools.append(pool)

    self.flat = tf.keras.layers.Flatten()

    self.dense = tf.keras.layers.Dense(64, activation=tf.keras.activations.relu)

    self.dropout = tf.keras.layers.Dropout(rate=self.dropout_rate)

    self.final_dense = tf.keras.layers.Dense(
        self.num_classes, activation=tf.keras.activations.linear)
Beispiel #8
0
    def logits_layer(self, x, labels):
        ''' Logits layer to further produce softmax. '''
        if labels is None:
            # serving export mode, no need for logits
            return x

        output_num = self.taskconf['classes']['num']
        logits_type = self.netconf['logits_type']
        logits_shape = [x.shape[-1].value, output_num]

        with tf.variable_scope('logits'):
            init_type = self.netconf['logits_weight_init']['type']
            if init_type == 'truncated_normal':
                stddev = self.netconf['logits_weight_init']['stddev']
                init = tf.truncated_normal_initializer(stddev=stddev)
            elif init_type == 'xavier_uniform':
                init = tf.contrib.layers.xavier_initializer(uniform=True)
            elif init_type == 'xavier_norm':
                init = tf.contrib.layers.xavier_initializer(uniform=False)
            else:
                raise ValueError('Unsupported weight init type: %s' %
                                 (init_type))

            weights = tf.get_variable(name='weights',
                                      shape=logits_shape,
                                      initializer=init)

            if logits_type == 'linear':
                bias = tf.get_variable(
                    name='bias',
                    shape=logits_shape[1],
                    initializer=tf.constant_initializer(0.0))
                return tf.matmul(x, weights) + bias
            elif logits_type == 'linear_no_bias':
                return tf.matmul(x, weights)
            elif logits_type == 'arcface':
                return self.arcface_layer(x, labels, output_num, weights)
Beispiel #9
0
    def __init__(self, config, **kwargs):
        super().__init__(**kwargs)
        logging.info("Initialize S2SModel")
        data_config = config['data']
        model_config = config['model']['net']['structure']
        self.use_label_vocab = data_config['task']['use_label_vocab']
        self.label_vocab_size = data_config['label_vocab_size']
        self.vocab_size = config['data']['vocab_size']
        self.use_pretrained_embedding = config['model']['use_pre_train_emb']
        self.embedding_size = model_config['embedding_size']
        if self.use_pretrained_embedding:
            self.embedding_path = config['model']['embedding_path']
            logging.info("Loading embedding file from: {}".format(
                self.embedding_path))
            self._word_embedding_init = pickle.load(
                open(self.embedding_path, 'rb'))
            self.embed_initializer = tf.constant_initializer(
                self._word_embedding_init)
        else:
            self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1)

        self.embed = tf.keras.layers.Embedding(
            self.vocab_size,
            self.embedding_size,
            embeddings_initializer=self.embed_initializer)
        self.share_embedding = model_config['share_embedding']
        if self.use_label_vocab:
            self.decode_vocab_size = self.label_vocab_size
        else:
            self.decode_vocab_size = self.vocab_size
        if self.share_embedding:
            self.decoder_embed = self.embed
        else:
            self.decoder_embed = tf.keras.layers.Embedding(
                self.decode_vocab_size,
                self.embedding_size,
                embeddings_initializer=self.embed_initializer)