コード例 #1
0
    def __init__(self, config, emb_layer, vocab_size, **kwargs):
        model_config = config['model']['net']['structure']
        self.is_infer = config['model']['is_infer']
        if self.is_infer:
            self.length_penalty = model_config['length_penalty']
        self.dropout_rate = model_config['dropout_rate']
        self.num_layers = model_config['num_layers']
        self.l2_reg_lambda = model_config['l2_reg_lambda']
        self.embedding_size = model_config['embedding_size']
        self.max_enc_len = model_config['max_enc_len']
        self.max_dec_len = model_config['max_dec_len']
        self.share_embedding = model_config['share_embedding']
        self.padding_token = 0
        self.beam_size = model_config['beam_size']

        self.mask_layer = tf.keras.layers.Lambda(lambda inputs: tf.cast(
            tf.not_equal(inputs, self.padding_token), tf.int32))

        self.embed = emb_layer
        self.vocab_size = vocab_size
        self.embed_d = tf.keras.layers.Dropout(self.dropout_rate)

        self.pos_embed = PositionEmbedding(self.max_enc_len,
                                           self.embedding_size)

        self.transformer_decoders = [
            TransformerDecoderLayer(config) for _ in range(self.num_layers)
        ]

        self.final_dense = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(self.vocab_size, name="final_dense"))

        super().__init__(**kwargs)
コード例 #2
0
    def __init__(self, config, **kwargs):
        super().__init__(config, **kwargs)
        tf.logging.info("Initialize TransformerModel...")
        model_config = config['model']['net']['structure']
        self.is_infer = config['model']['is_infer']
        if self.is_infer:
            self.length_penalty = model_config['length_penalty']
        self.dropout_rate = model_config['dropout_rate']
        self.num_layers = model_config['num_layers']
        self.l2_reg_lambda = model_config['l2_reg_lambda']
        self.max_enc_len = model_config['max_enc_len']
        self.max_dec_len = model_config['max_dec_len']
        self.share_embedding = model_config['share_embedding']
        self.padding_token = utils.PAD_IDX
        self.beam_size = model_config['beam_size']

        self.mask_layer = tf.keras.layers.Lambda(lambda inputs: tf.cast(
            tf.not_equal(inputs, self.padding_token), tf.int32))

        self.embed_d = tf.keras.layers.Dropout(self.dropout_rate)

        self.pos_embed = layers.PositionEmbedding(self.max_enc_len,
                                                  self.embedding_size)

        self.encoder = layers.TransformerEncoder(config)
        self.decoder = layers.TransformerDecoder(config, self.embed,
                                                 self.decode_vocab_size)
        logging.info("decode_vocab_size: {}".format(self.decode_vocab_size))
        logging.info("Initialize TransformerModel done.")
コード例 #3
0
ファイル: utils.py プロジェクト: youisbaby/delta
def compute_sen_lens(inputs, padding_token=0):
    """
  Count how many words in a sentence.
  inputs: [..., time_steps]
  sen_lens: [...]
  """
    x_binary = tf.cast(tf.not_equal(inputs, padding_token), tf.int32)
    sen_lens = tf.reduce_sum(x_binary, axis=-1)
    ones = tf.ones_like(sen_lens)
    sen_lens = tf.where(tf.equal(sen_lens, utils.PAD_IDX), x=ones, y=sen_lens)
    return sen_lens
コード例 #4
0
ファイル: text_seq2seq_model.py プロジェクト: zhjou/delta
 def __init__(self, config, **kwargs):
     super().__init__(config, **kwargs)
     logging.info("Initialize RnnSeq2SeqModel...")
     model_config = config['model']['net']['structure']
     self.is_infer = config['model']['is_infer']
     self.dropout_rate = model_config['dropout_rate']
     self.padding_token = utils.PAD_IDX
     self.embed_d = tf.keras.layers.Dropout(self.dropout_rate)
     self.encoder = layers.RnnEncoder(config, name="encoder")
     self.decoder = layers.RnnDecoder(config,
                                      self.decoder_embed,
                                      self.decode_vocab_size,
                                      name="decoder")
     self.mask_layer = tf.keras.layers.Lambda(lambda inputs: tf.cast(
         tf.not_equal(inputs, self.padding_token), tf.int32))
コード例 #5
0
ファイル: ctc_utils.py プロジェクト: lizhanyang505/delta-1
def transform_preprocess(labels=None, blank_index=None, num_class=None):
  ''' Ensure that the value of blank_index is in a reasonable range,
      and transform the DenseTensor labels to a SparseTensor '''
  if blank_index is None or blank_index < 0:
    raise ValueError('blank_index must be greater than or equal to zero')

  if not num_class is None and blank_index > (num_class - 1):
    raise ValueError('blank_index must be less than or equal to num_class - 1')

  if labels is None:
    return None

  if not isinstance(labels, tf.SparseTensor):
    labels = tf.cast(labels, tf.int32)
    labels_idx = tf.where(tf.not_equal(labels, 0))
    labels_values = tf.gather_nd(labels, labels_idx)
    labels_shape = tf.cast(tf.shape(labels), dtype=tf.int64)
    labels = tf.SparseTensor(
        indices=labels_idx, values=labels_values, dense_shape=labels_shape)

  return labels
コード例 #6
0
  def __init__(self, config, **kwargs):
    super().__init__(config, **kwargs)
    logging.info("Initialize TransformerModel...")

    self.vocab_size = config['data']['vocab_size']
    self.num_classes = config['data']['task']['classes']['num_classes']

    model_config = config['model']['net']['structure']
    self.dropout_rate = model_config['dropout_rate']
    self.embedding_size = model_config['embedding_size']
    self.num_layers = model_config['num_layers']
    self.l2_reg_lambda = model_config['l2_reg_lambda']
    self.max_len = model_config['max_len']
    self.transformer_dropout = model_config['transformer_dropout']
    self.residual_conn = model_config['residual_conn']
    self.head_num = model_config['head_num']
    self.hidden_dim = model_config['hidden_dim']
    self.padding_token = utils.PAD_IDX

    self.mask_layer = tf.keras.layers.Lambda(lambda inputs: tf.cast(
        tf.not_equal(inputs, self.padding_token), tf.int32))
    self.embed = tf.keras.layers.Embedding(
        self.vocab_size,
        self.embedding_size,
        embeddings_initializer=self.embed_initializer)

    self.pos_embed = layers.PositionEmbedding(self.max_len, self.embedding_size)

    self.embed_d = tf.keras.layers.Dropout(self.dropout_rate)

    self.transformer_encoder = layers.TransformerEncoder(config)

    self.pool = tf.keras.layers.GlobalMaxPooling1D()

    self.final_dense = tf.keras.layers.Dense(
        self.num_classes,
        activation=tf.keras.activations.linear,
        name="final_dense")
    logging.info("Initialize TransformerModel done.")
コード例 #7
0
def compute_mel_filterbank_features(waveforms,
                                    sample_rate=16000,
                                    dither=1.0 / np.iinfo(np.int16).max,
                                    preemphasis=0.97,
                                    frame_length=25,
                                    frame_step=10,
                                    fft_length=None,
                                    window_fn=functools.partial(
                                        tf.signal.hann_window, periodic=True),
                                    lower_edge_hertz=80.0,
                                    upper_edge_hertz=7600.0,
                                    num_mel_bins=80,
                                    log_noise_floor=1e-3,
                                    apply_mask=True):
    """Implement mel-filterbank extraction using tf ops.
  Args:
    waveforms: float32 tensor with shape [batch_size, max_len]
    sample_rate: sampling rate of the waveform
    dither: stddev of Gaussian noise added to waveform to prevent quantization
      artefacts
    preemphasis: waveform high-pass filtering constant
    frame_length: frame length in ms
    frame_step: frame_Step in ms
    fft_length: number of fft bins
    window_fn: windowing function
    lower_edge_hertz: lowest frequency of the filterbank
    upper_edge_hertz: highest frequency of the filterbank
    num_mel_bins: filterbank size
    log_noise_floor: clip small values to prevent numeric overflow in log
    apply_mask: When working on a batch of samples, set padding frames to zero
  Returns:
    filterbanks: a float32 tensor with shape [batch_size, len, num_bins, 1]
  """
    #  is a complex64 Tensor representing the short-time Fourier
    # Transform of each signal in . Its shape is
    # [batch_size, ?, fft_unique_bins]
    # where fft_unique_bins = fft_length // 2 + 1

    # Find the wave length: the largest index for which the value is !=0
    # note that waveforms samples that are exactly 0.0 are quite common, so
    # simply doing sum(waveforms != 0, axis=-1) will not work correctly.
    wav_lens = tf.reduce_max(
        tf.expand_dims(tf.range(tf.shape(waveforms)[1]), 0) *
        tf.to_int32(tf.not_equal(waveforms, 0.0)),
        axis=-1) + 1
    if dither > 0:
        waveforms += tf.random_normal(tf.shape(waveforms), stddev=dither)
    if preemphasis > 0:
        waveforms = waveforms[:, 1:] - preemphasis * waveforms[:, :-1]
        wav_lens -= 1
    frame_length = int(frame_length * sample_rate / 1e3)
    frame_step = int(frame_step * sample_rate / 1e3)
    if fft_length is None:
        fft_length = int(2**(np.ceil(np.log2(frame_length))))

    stfts = tf.signal.stft(waveforms,
                           frame_length=frame_length,
                           frame_step=frame_step,
                           fft_length=fft_length,
                           window_fn=window_fn,
                           pad_end=True)

    stft_lens = (wav_lens + (frame_step - 1)) // frame_step
    masks = tf.to_float(
        tf.less_equal(tf.expand_dims(tf.range(tf.shape(stfts)[1]), 0),
                      tf.expand_dims(stft_lens, 1)))

    # An energy spectrogram is the magnitude of the complex-valued STFT.
    # A float32 Tensor of shape [batch_size, ?, 257].
    magnitude_spectrograms = tf.abs(stfts)

    # Warp the linear-scale, magnitude spectrograms into the mel-scale.
    num_spectrogram_bins = magnitude_spectrograms.shape[-1].value
    linear_to_mel_weight_matrix = (tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz,
        upper_edge_hertz))
    mel_spectrograms = tf.tensordot(magnitude_spectrograms,
                                    linear_to_mel_weight_matrix, 1)
    # Note: Shape inference for tensordot does not currently handle this case.
    mel_spectrograms.set_shape(magnitude_spectrograms.shape[:-1].concatenate(
        linear_to_mel_weight_matrix.shape[-1:]))

    log_mel_sgram = tf.log(tf.maximum(log_noise_floor, mel_spectrograms))

    if apply_mask:
        log_mel_sgram *= tf.expand_dims(tf.to_float(masks), -1)

    return tf.expand_dims(log_mel_sgram, -1, name="mel_sgrams")
コード例 #8
0
ファイル: text_seq2seq_task.py プロジェクト: youisbaby/delta
 def exclude_padding(self, batch):
     x_binary = tf.cast(tf.not_equal(batch, utils.PAD_IDX), tf.int32)
     sen_lens = tf.reduce_sum(x_binary, axis=-1)
     return batch[:sen_lens]