コード例 #1
0
    def tdnn_block(self, inputs):
        ''' TDNN layers. '''
        if 'tdnn_method' in self.netconf:
            tdnn_method = self.netconf['tdnn_method']
        else:
            # Runs faster, support discrete context, for now.
            tdnn_method = 'splice_layer'
        tdnn_contexts = self.netconf['tdnn_contexts']
        logging.info("tdnn_contexts : {}".format(tdnn_contexts))
        tdnn_dims = self.netconf['tdnn_dims']
        logging.info("tdnn_dims : {}".format(tdnn_dims))

        layer_num = len(tdnn_contexts)
        assert layer_num == len(tdnn_dims)

        channels = [self.input_channels] + tdnn_dims
        logging.info("tdnn_channels : {}".format(channels))

        input_h_t = tf.shape(inputs)[1]
        input_w = inputs.shape[2]
        input_c = inputs.shape[3]
        if tdnn_method == 'conv1d':
            # NHWC -> NW'C, W' = H * W
            inputs = tf.reshape(inputs, [-1, input_h_t * input_w, input_c])
            last_w = channels[0]
        else:
            inputs = tf.reshape(inputs, [-1, input_h_t, input_w * input_c])
            last_w = input_w * input_c

        downsample_input_len = self.input_len
        with tf.variable_scope('tdnn'):
            x = tf.identity(inputs)
            for index in range(layer_num):
                unit_name = 'unit-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    tdnn_name = 'tdnn-' + str(index + 1)
                    x = common_layers.tdnn(x,
                                           tdnn_name,
                                           last_w,
                                           tdnn_contexts[index],
                                           channels[index + 1],
                                           has_bias=True,
                                           method=tdnn_method)
                    last_w = channels[index + 1]
                    x = tf.nn.relu(x)
                    if self.netconf['use_bn']:
                        bn_name = 'bn' + str(index + 1)
                        x = tf.layers.batch_normalization(x,
                                                          axis=-1,
                                                          momentum=0.9,
                                                          training=self.train,
                                                          name=bn_name)
                    if self.netconf['use_dropout']:
                        x = tf.layers.dropout(x,
                                              self.netconf['dropout_rate'],
                                              training=self.train)
                    downsample_input_len = downsample_input_len

        return x, downsample_input_len
コード例 #2
0
def attention(inputs, attention_size, time_major=False, return_alphas=False):
    """Attention layer."""
    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.transpose(inputs, [1, 0, 2])

    time_size = inputs.shape[1].value  # T value - time size of the RNN layer
    hidden_size = inputs.shape[
        2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    W_omega = tf.get_variable(name='W_omega',
                              initializer=tf.random_normal(
                                  [hidden_size, attention_size], stddev=0.1))
    b_omega = tf.get_variable(name='b_omega',
                              initializer=tf.random_normal([attention_size],
                                                           stddev=0.1))
    u_omega = tf.get_variable(name='u_omega',
                              initializer=tf.random_normal([attention_size, 1],
                                                           stddev=0.1))

    # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
    #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
    #v = tf.tanh(tf.tensordot(inputs, W_omega, axes=1) + b_omega)
    #v = tf.sigmoid(tf.tensordot(inputs, W_omega, axes=1) + b_omega)
    # (B, T, D) dot (D, Atten)

    logging.info('attention inputs: {}'.format(inputs.shape))
    inputs_reshaped = tf.reshape(inputs, [-1, hidden_size])
    dot = tf.matmul(inputs_reshaped, W_omega)
    dot = tf.reshape(dot, [-1, time_size, attention_size])
    v = tf.sigmoid(dot + b_omega)
    logging.info(f'attention vector: {v.shape}')
    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    # (B, T, Atten) dot (Atten)
    #vu = tf.tensordot(v, u_omega, axes=1)   # (B,T) shape
    v = tf.reshape(v, [-1, attention_size])
    vu = tf.matmul(v, u_omega)  # (B,T) shape
    vu = tf.squeeze(vu, axis=-1)
    vu = tf.reshape(vu, [-1, time_size])
    logging.info(f'attention energe: {vu.shape}')
    alphas = tf.nn.softmax(vu)  # (B,T) shape also

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    # [batch, time] -> [batch, time, 1]
    alphas = tf.expand_dims(alphas, -1)
    # [batch, time, dim] -> [batch, dim]
    output = tf.reduce_sum(inputs * alphas, 1)

    if not return_alphas:
        return output

    return output, alphas
コード例 #3
0
        def grow_topk(i, alive_seq, alive_log_probs, states):
            """Inner beam search loop."""

            flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1])

            # (batch_size * beam_size, decoded_length)
            if states:
                flat_states = nest.map_structure(_merge_beam_dim, states)
                flat_logits, flat_states = symbols_to_logits_fn(
                    flat_ids, i, flat_states)
                states = nest.map_structure(
                    lambda t: _unmerge_beam_dim(t, batch_size, beam_size),
                    flat_states)
            else:
                flat_logits = symbols_to_logits_fn(flat_ids)

            logits = tf.reshape(flat_logits, [batch_size, beam_size, -1])

            candidate_log_probs = log_prob_from_logits(logits)

            log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs,
                                                             axis=2)

            length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha)

            curr_scores = log_probs / length_penalty
            flat_curr_scores = tf.reshape(curr_scores,
                                          [-1, beam_size * vocab_size])

            topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores,
                                                k=beam_size * 2)

            topk_log_probs = topk_scores * length_penalty

            topk_beam_index = topk_ids // vocab_size
            topk_ids %= vocab_size  # Unflatten the ids
            batch_pos = compute_batch_indices(batch_size, beam_size * 2)
            topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2)

            topk_seq = tf.gather_nd(alive_seq, topk_coordinates)
            if states:
                states = nest.map_structure(
                    lambda state: tf.gather_nd(state, topk_coordinates),
                    states)
            topk_seq = tf.concat(
                [topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2)

            topk_finished = tf.equal(topk_ids, eos_id)

            return topk_seq, topk_log_probs, topk_scores, topk_finished, states
コード例 #4
0
  def call(self, inputs, training=None, mask=None):
    input_x = inputs["input_x"]
    if self.use_dense_task:
      dense_input = inputs["input_dense"]

    # [batch_size, max_len, embed_len]
    out = self.embed(input_x)
    if self.use_pretrained_model:
      logging.info("use_pretrained_model: {}, {}".format(
          self.pretrained_model_name, self.pretrained_model_mode))
      if self.pretrained_model_name == 'elmo':
        input_px = self.get_pre_train_graph(input_x)
        input_px = tf.reshape(input_px,
                              [-1, self.max_len, self.pretrained_model_dim])
        out = tf.concat([out, input_px], axis=-1)
        out = tf.reduce_max(out, axis=1)
      if self.pretrained_model_name == 'bert':
        out = self.get_pre_train_graph(input_x)
    else:
      out = tf.reduce_max(out, axis=1)
    out = self.embed_d(out, training=training)
    if self.use_dense_input:
      dense_out = self.dense_input_linear(dense_input)
      if self.only_dense_input:
        out = dense_out
      else:
        out = tf.keras.layers.Concatenate()([out, dense_out])
    # [batch_size, class_num]
    scores = self.final_dense(out)
    return scores
コード例 #5
0
 def grad_variance(self):
     grad_var_ops = []
     tensor_to_avg = []
     for t, g in zip(self._tvars, self._grads):
         if isinstance(g, ops.IndexedSlices):
             tensor_to_avg.append(
                 tf.reshape(tf.unsorted_segment_sum(g.values, g.indices,
                                                    g.dense_shape[0]),
                            shape=t.get_shape()))
         else:
             tensor_to_avg.append(g)
     avg_op = self._moving_averager.apply(tensor_to_avg)
     grad_var_ops.append(avg_op)
     with tf.control_dependencies([avg_op]):
         self._grad_avg = [
             self._moving_averager.average(val) for val in tensor_to_avg
         ]
         self._grad_avg_squared = [tf.square(val) for val in self._grad_avg]
     self._grad_var = tf.maximum(
         tf.constant(EPS, dtype=self._grad_norm_squared_avg.dtype),
         self._grad_norm_squared_avg -
         tf.add_n([tf.reduce_sum(val) for val in self._grad_avg_squared]))
     if self._sparsity_debias:
         self._grad_var *= self._sparsity_avg
     return grad_var_ops
コード例 #6
0
    def tdnn(self, features, n_class, is_train):
        '''
        inp: (batch_size, window_len, feat_dim)
    '''
        inp = features['inputs']
        kernel_size = self.cfg['model']['net']['kernel_size']
        strides = self.cfg['model']['net']['strides']
        num_layers = self.cfg['model']['net']['num_layers']
        filters_num = inp.get_shape()[-1]

        for i in range(num_layers):
            output = tf.nn.relu(
                tf.layers.conv1d(inp,
                                 filters_num,
                                 kernel_size,
                                 strides=strides))
            output = tf.layers.batch_normalization(output,
                                                   training=is_train,
                                                   name='bn%d' % i)
            inp = output

        dim = output.get_shape()[1] * output.get_shape()[2]
        output = tf.reshape(output, [-1, dim])

        logits = tf.layers.dense(output, n_class)
        return logits
コード例 #7
0
def extract_feature(waveforms, params):
    '''extract fbank with delta-delta and do cmvn
     waveforms: [batch, samples]
  '''
    p = params
    with tf.variable_scope('feature_extractor'):
        mel_fbanks = extract_logfbank_with_delta(waveforms, params)
        # shape: [1, nframes, nbins, nchannels]
        fbank_size = utils.shape_list(mel_fbanks)
        #assert fbank_size[0] == 1

        # This replaces CMVN estimation on data
        if not p.audio_global_cmvn:
            mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1)
            variance = tf.reduce_mean(tf.square(mel_fbanks - mean),
                                      keepdims=True,
                                      axis=1)
        else:
            assert p.audio_cmvn_path, p.audio_cmvn_path
            mean, variance = utils.load_cmvn(p.audio_cmvn_path)

        var_epsilon = 1e-09
        mel_fbanks = utils.apply_cmvn(mel_fbanks, mean, variance, var_epsilon)

        # Later models like to flatten the two spatial dims. Instead, we add a
        # unit spatial dim and flatten the frequencies and channels.
        batch_size = fbank_size[0]
        feats = tf.concat([
            tf.reshape(
                mel_fbanks,
                [batch_size, fbank_size[1], fbank_size[2], fbank_size[3]]),
            tf.zeros((batch_size, p.num_zeropad_frames, fbank_size[2],
                      fbank_size[3]))
        ], 1)
    return feats  # shape [batch_size, nframes, featue_size, chnanels]
コード例 #8
0
ファイル: text_match_model.py プロジェクト: zhjou/delta
    def call(self, inputs, training=None, mask=None):  # pylint: disable=too-many-locals

        input_left = inputs["input_x_left"]
        input_right = inputs["input_x_right"]

        embedding = self.embed
        embed_left = embedding(input_left)
        embed_right = embedding(input_right)

        encoded_left = self.lstm_left(embed_left)
        encoded_right = self.lstm_right(embed_right)

        encoded_right = tf.transpose(encoded_right, [0, 2, 1])
        left_right_sim = tf.matmul(encoded_left, encoded_right)
        shape_list = left_right_sim.get_shape()
        newdim = shape_list[1] * shape_list[2]
        sim_matrix = tf.reshape(left_right_sim, [-1, newdim],
                                name="sim_matrix")

        dropout = self.dropout(sim_matrix)
        out = self.outlayer(dropout)

        scores = self.final_dense(out)

        return scores
コード例 #9
0
    def model(self, feats, labels):
        ''' Build the model. '''
        x = self.resnet(feats)

        with tf.variable_scope("avg_pooling"):
            batch_t = tf.shape(x)[0]
            time_t = tf.shape(x)[1]
            feat, channel = x.shape.as_list()[2:]
            x = tf.reshape(x, [batch_t, time_t, feat * channel])
            x = self.pooling_layer(x, pooling_type='average')

        with tf.variable_scope("output_layer"):
            shape = x.shape.as_list()
            shape = shape[-1]
            hidden_dims = self.params().embedding_size
            y = x
            y = common_layers.linear(y,
                                     'dense-matmul', [shape, hidden_dims],
                                     has_bias=True)
            y = tf.layers.batch_normalization(y,
                                              axis=-1,
                                              momentum=0.99,
                                              training=self.train,
                                              name='dense-bn')
            embedding = y
            dense_output = y

        logits = self.logits_layer(dense_output, labels)
        model_outputs = {'logits': logits, 'embeddings': embedding}
        return model_outputs
コード例 #10
0
ファイル: mfcc.py プロジェクト: lizhanyang505/delta-1
    def call(self, audio_data, sample_rate=None):
        """
    Caculate mfcc features of audio data.
    :param audio_data: the audio signal from which to compute spectrum.
                       Should be an (1, N) tensor.
    :param sample_rate: the samplerate of the signal we working with.
    :return: A float tensor of size (num_channels, num_frames, num_frequencies)
            containing mfcc features of every frame in speech.
    """
        p = self.config
        with tf.name_scope('mfcc'):

            if sample_rate == None:
                sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)

            assert_op = tf.assert_equal(tf.constant(p.sample_rate),
                                        tf.cast(sample_rate, dtype=tf.int32))
            with tf.control_dependencies([assert_op]):

                fbank_feats = self.fbank(audio_data, sample_rate)
                sample_rate = tf.cast(sample_rate, dtype=tf.int32)
                shape = tf.shape(fbank_feats)
                nframe = shape[0]
                nfbank = shape[1]
                fbank_feats = tf.reshape(fbank_feats, (1, nframe, nfbank))
                framepow_feats = self.framepow(audio_data, sample_rate)
                mfcc = py_x_ops.mfcc(fbank_feats,
                                     framepow_feats,
                                     sample_rate,
                                     use_energy=p.use_energy,
                                     cepstral_lifter=p.cepstral_lifter,
                                     coefficient_count=p.coefficient_count)
                return mfcc
コード例 #11
0
  def call(self, inputs, training=None, mask=None):  # pylint: disable=too-many-locals
    input_x = tf.identity(inputs["input_x"], name='input_x')
    if self.use_dense_task:
      dense_input = inputs["input_dense"]
    if self.use_true_length:
      # [batch_size, max_doc_len, max_sen_len]
      input_hx = self.pad_to_hier_input_true_len(
          input_x,
          self.max_doc_len,
          self.max_sen_len,
          self.split_token,
          padding_token=self.padding_token)
    else:
      # [batch_size, max_doc_len, max_sen_len]
      input_hx = self.pad_to_hier_input(
          input_x,
          self.max_doc_len,
          self.max_sen_len,
          padding_token=self.padding_token)

    # [batch_size, max_doc_len]
    sen_lens = compute_sen_lens(input_hx, padding_token=self.padding_token)
    # [batch_size]
    doc_lens = compute_doc_lens(sen_lens)
    # [batch_size, max_doc_len, max_sen_len, 1]
    sen_mask = tf.expand_dims(
        tf.sequence_mask(sen_lens, self.max_sen_len, dtype=tf.float32), axis=-1)

    # [batch_size, max_doc_len, 1]
    doc_mask = tf.expand_dims(
        tf.sequence_mask(doc_lens, self.max_doc_len, dtype=tf.float32), axis=-1)

    # [batch_size, max_doc_len, max_sen_len, embed_len]
    out = self.embed(input_hx)
    if self.use_pretrained_model:
      input_px = self.get_pre_train_graph(input_x)
      input_px = tf.reshape(
          input_px,
          [-1, self.max_doc_len, self.max_sen_len, self.pretrained_model_dim])
      out = tf.concat([out, input_px], axis=-1)
    out = self.embed_d(out, training=training)
    all_sen_encoder = tf.keras.layers.TimeDistributed(self.sen_encoder)
    # [batch_size, max_doc_len, features]
    out = all_sen_encoder(out, training=training, mask=sen_mask)
    # [batch_size, features]
    out = self.doc_encoder(out, training=training, mask=doc_mask)

    if self.use_dense_input:
      dense_out = self.dense_input_linear(dense_input)
      if self.only_dense_input:
        out = dense_out
      else:
        out = tf.keras.layers.Concatenate()([out, dense_out])

    # [batch_size, class_num]
    scores = self.final_dense(out)

    return scores
コード例 #12
0
 def pad_to_hier_input(inputs, max_doc_len, max_sen_len, padding_token=0):
   """
   Input shape: [batch_size, max_len]
   New Input shape: [batch_size, max_doc_len, max_sen_len]
   """
   new_len = max_sen_len * max_doc_len
   new_input = cut_or_padding(inputs, new_len, padding_token=padding_token)
   new_input = tf.reshape(new_input, [-1, max_doc_len, max_sen_len])
   return new_input
コード例 #13
0
 def linear_block(self, x):
     '''
 linear layer for dim reduction
 x: shape [batch, time, feat, channel]
 output: shape [b, t, f]
 '''
     with tf.variable_scope('linear'):
         times, feat, channel = x.shape.as_list()[1:]
         x = tf.reshape(x, [-1, feat * channel])
         if self.netconf['use_dropout']:
             x = tf.layers.dropout(x,
                                   self.netconf['dropout_rate'],
                                   training=self.train)
         x = common_layers.linear(
             x, 'linear1', [feat * channel, self.netconf['linear_num']])
         #x = tf.nn.relu6(x)
         x = tf.reshape(x, [-1, times, self.netconf['linear_num']])
     return x
コード例 #14
0
    def _loop_body(time, end_time, context, left_context, right_context,
                   output_tas):
        shape = tf.shape(context)
        B, _, D = shape[0], shape[1], shape[2]
        N = (1 + left_context + right_context) * D

        new_feat = context[:, time:time + left_context + 1 + right_context, :]
        new_feat = tf.reshape(new_feat, [B, N])
        new_output_tas = output_tas.write(time, new_feat)
        return (time + 1, end_time, context, left_context, right_context,
                new_output_tas)
コード例 #15
0
    def test_maxpool(self):
        '''test maxpool'''
        inputs = tf.reshape(tf.range(25), shape=[1, 5, 5, 1])  #A 4D tensor
        ksize = [3, 3]
        strides = [1, 1]
        output = cl.max_pool(inputs, ksize, strides)
        output_shape = [1, 3, 3, 1]
        self.assertAllEqual(tf.shape(output), output_shape)

        output_true = tf.constant([[[[12], [13], [14]], [[17], [18], [19]],
                                    [[22], [23], [24]]]])
        self.assertAllEqual(output, output_true)
コード例 #16
0
   def _reshape_mask(mask):
       """
 repeat mask for multi head
   Input shape: (Batch size, steps)
   Output shape: (Batch size * head num, steps)
 """
       if mask is None:
           return None
       seq_len = tf.shape(mask)[1]
       mask = tf.expand_dims(mask, axis=1)
       mask = tf.tile(mask, [1, self.head_num, 1])
       return tf.reshape(mask, shape=(-1, seq_len))
コード例 #17
0
    def linear_block(self, x):
        '''
    linear layer for dim reduction
    x: shape [batch, time, feat, channel]
    output: shape [b, t, f]
    '''
        batch_t = tf.shape(x)[0]
        time_t = tf.shape(x)[1]
        feat, channel = x.shape.as_list()[2:]
        linear_num = self.netconf['linear_num']

        if linear_num > 0:
            with tf.variable_scope('linear'):
                x = tf.reshape(x, [batch_t * time_t, feat * channel])

                if self.netconf['use_dropout']:
                    x = tf.layers.dropout(x,
                                          self.netconf['dropout_rate'],
                                          training=self.train)

                x = common_layers.linear(x, 'linear1',
                                         [feat * channel, linear_num])

                x = tf.nn.relu(x)

                if self.netconf['use_bn']:
                    bn_name = 'bn_linear'
                    x = tf.layers.batch_normalization(x,
                                                      axis=-1,
                                                      momentum=0.9,
                                                      training=self.train,
                                                      name=bn_name)

                x = tf.reshape(x, [batch_t, time_t, linear_num])
        else:
            logging.info('linear_num <= 0, only apply reshape.')
            x = tf.reshape(x, [batch_t, time_t, feat * channel])

        return x
コード例 #18
0
ファイル: transformer.py プロジェクト: lizhanyang505/delta-1
    def call(self, inps, training=None, mask=None):
        if not self.is_infer:
            dec_inp, enc_out = inps
            with tf.name_scope('while'):
                dec_out = self.decode(dec_inp, enc_out, training, mask)
                scores = self.final_dense(dec_out)
                return scores
        else:
            enc_out = inps
            init_ids = tf.cast(
                tf.ones([utils.shape_list(enc_out)[0]]) * self.sos_id,
                tf.int32)
            # Beam Search
            enc_shape = utils.shape_list(enc_out)
            enc_out = tf.tile(tf.expand_dims(enc_out, axis=1),
                              [1, self.beam_size, 1, 1])
            enc_out = tf.reshape(
                enc_out,
                [enc_shape[0] * self.beam_size, enc_shape[1], enc_shape[2]])
            enc_mask = tf.tile(tf.expand_dims(mask, axis=1),
                               [1, self.beam_size, 1, 1, 1])
            enc_mask = tf.reshape(enc_mask,
                                  [enc_shape[0] * self.beam_size, 1, 1, -1])

            def symbols_to_logits_fn(dec_inps):
                dec_out = self.decode(dec_inps, enc_out, training, enc_mask)
                scores = self.final_dense(dec_out)
                return scores[:, -1, :]

            decoded_ids, scores, _ = self.beam_search(symbols_to_logits_fn,
                                                      init_ids, self.beam_size,
                                                      self.max_dec_len,
                                                      self.vocab_size,
                                                      self.length_penalty,
                                                      self.eos_id)
            decoded_ids = decoded_ids[:, 0, 1:]

            return decoded_ids
コード例 #19
0
ファイル: sub_tf.py プロジェクト: zhjou/delta
    def split_heads(self, x, batch_size):
        """
    Split hidden_size into depth(hidden_size // num_heads) for
    multi-head attention.
    Args:
      x: (batch_size, seq_len_x, hidden_size)
      batch_size

    Returns:
      split_x: (batch_size, num_heads, seq_len_x, depth)
    """
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        split_x = tf.transpose(x, perm=[0, 2, 1, 3])
        return split_x
コード例 #20
0
ファイル: loss_impl.py プロジェクト: lizhanyang505/delta-1
  def call(self,
           logits=None,
           input_length=None,
           labels=None,
           label_length=None,
           **kwargs):
    assert "model" in kwargs
    model = kwargs["model"]
    tags_scores = tf.reshape(
        logits, [-1, model.max_len, model.seq_num_classes], name="scores")
    loss, _ = crf_log_likelihood(tags_scores, labels, input_length,
                                 model.transitions)

    return loss
コード例 #21
0
 def pooling_layer(self, x, time_len):
     ''' pooling layer'''
     with tf.variable_scope('time_pooling'):
         if self.attention:
             x, self.alphas = common_layers.attention(
                 x, self.netconf['attention_size'], return_alphas=True)
             #alphas shape [batch, time, 1] -> [1, batch, time, 1]-> [1, time, batch, 1]
             tf.summary.image(
                 'alignment',
                 tf.transpose(tf.expand_dims(self.alphas, 0), [0, 2, 1, 3]))
         else:
             if self.netconf['use_lstm_layer']:
                 x = tf.concat(x, 2)
             # [batch, seq_len, dim, 1]
             x = tf.expand_dims(x, axis=-1)
             seq_len = time_len
             x = common_layers.max_pool(x,
                                        ksize=[seq_len, 1],
                                        strides=[seq_len, 1])
             if self.netconf['use_lstm_layer']:
                 x = tf.reshape(x, [-1, 2 * self.netconf['cell_num']])
             else:
                 x = tf.reshape(x, [-1, self.netconf['linear_num']])
         return x
コード例 #22
0
ファイル: sub_tf.py プロジェクト: zhjou/delta
    def call(self, inputs, training=None, mask=None):
        """
    The implementation of Multi-headed attention.
    Args:
      inputs = (v, k, q)
      q: (batch_size, seq_len_q, hidden_size)
      k: (batch_size, seq_len_k, hidden_size)
      v: (batch_size, seq_len_v, hidden_size)
      mask: (batch_size, seq_len_q, seq_len_k)

    Returns:
      output: (batch_size, seq_len_q, hidden_size)
      attention_weights: (batch_size, num_heads, seq_len_q, seq_len_k)
    """
        q, k, v = inputs
        batch_size = tf.shape(q)[0]

        q = self.wq(q)  # (batch_size, seq_len_q, hidden_size)
        k = self.wk(k)  # (batch_size, seq_len_k, hidden_size)
        v = self.wv(v)  # (batch_size, seq_len_v, hidden_size)

        q = self.split_heads(
            q, batch_size)  # (batch_size, num_heads, seq_len_q, depth)
        k = self.split_heads(
            k, batch_size)  # (batch_size, num_heads, seq_len_k, depth)
        v = self.split_heads(
            v, batch_size)  # (batch_size, num_heads, seq_len_v, depth)

        # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)
        scaled_attention, attention_weights = self.scaled_dot_product_attention(
            q, k, v, mask)

        scaled_attention = tf.transpose(
            scaled_attention,
            perm=[0, 2, 1, 3])  # (batch_size, seq_len_q, num_heads, depth)

        concat_attention = tf.reshape(
            scaled_attention,
            (batch_size, -1,
             self.hidden_size))  # (batch_size, seq_len_q, hidden_size)

        output = self.dense(
            concat_attention)  # (batch_size, seq_len_q, hidden_size)

        return output, attention_weights
コード例 #23
0
def delta_delta(feat, order=2):
    '''
  params:
    feat: a tensor of shape [nframe, nfbank] or [nframe, nfbank, 1]
  return: [nframe, nfbank, 3]
  '''
    feat = tf.cond(tf.equal(tf.rank(feat), 3),
                   true_fn=lambda: feat[:, :, 0],
                   false_fn=lambda: feat)

    shape = tf.shape(feat)
    # [nframe nfbank*3]
    nframe = shape[0]
    nfbank = shape[1]
    delta = py_x_ops.delta_delta(feat, order=order)
    feat_with_delta_delta = tf.reshape(delta, (nframe, nfbank, (order + 1)))
    return feat_with_delta_delta
コード例 #24
0
    def test_splice_layer(self):
        '''test splice layer'''
        inputs = tf.reshape(tf.range(15), shape=[1, 5, 3])
        context = [0, 1]
        output = cl.splice_layer(inputs, 'splice', context)
        output_true = tf.constant([[[0, 1, 2, 3, 4, 5], [3, 4, 5, 6, 7, 8],
                                    [6, 7, 8, 9, 10, 11],
                                    [9, 10, 11, 12, 13, 14],
                                    [12, 13, 14, 12, 13, 14]]])
        self.assertAllEqual(output, output_true)

        context = [-1, 0, 1]
        output = cl.splice_layer(inputs, 'splice', context)
        output_true = tf.constant([[[0, 1, 2, 0, 1, 2, 3, 4, 5],
                                    [0, 1, 2, 3, 4, 5, 6, 7, 8],
                                    [3, 4, 5, 6, 7, 8, 9, 10, 11],
                                    [6, 7, 8, 9, 10, 11, 12, 13, 14],
                                    [9, 10, 11, 12, 13, 14, 12, 13, 14]]])
        self.assertAllEqual(output, output_true)

        context = [0, 1, 3]
        output = cl.splice_layer(inputs, 'splice', context)
        output_true = tf.constant([[[0, 1, 2, 3, 4, 5, 9, 10, 11],
                                    [3, 4, 5, 6, 7, 8, 12, 13, 14],
                                    [6, 7, 8, 9, 10, 11, 12, 13, 14],
                                    [9, 10, 11, 12, 13, 14, 12, 13, 14],
                                    [12, 13, 14, 12, 13, 14, 12, 13, 14]]])
        self.assertAllEqual(output, output_true)

        context = [1, 3]
        output = cl.splice_layer(inputs, 'splice', context)
        output_true = tf.constant([[[3, 4, 5, 9, 10, 11],
                                    [6, 7, 8, 12, 13, 14],
                                    [9, 10, 11, 12, 13, 14],
                                    [12, 13, 14, 12, 13, 14],
                                    [12, 13, 14, 12, 13, 14]]])
        self.assertAllEqual(output, output_true)

        context = [1, 2, 3]
        output = cl.splice_layer(inputs, 'splice', context)
        output_true = tf.constant([[[3, 4, 5, 6, 7, 8, 9, 10, 11],
                                    [6, 7, 8, 9, 10, 11, 12, 13, 14],
                                    [9, 10, 11, 12, 13, 14, 12, 13, 14],
                                    [12, 13, 14, 12, 13, 14, 12, 13, 14],
                                    [12, 13, 14, 12, 13, 14, 12, 13, 14]]])
        self.assertAllEqual(output, output_true)
コード例 #25
0
ファイル: delta_delta.py プロジェクト: xiaming9880/delta
    def call(self, feat, order, window):
        """
    Caculate delta of feats.
    :param feat: a float tensor of size (num_frames, dim_feat).
    :param order: an int.
    :param window: an int.
    :return: A tensor with shape (num_frames, dim_feats, order + 1),
        containing delta of features of every frame in speech.
    """

        p = self.config
        with tf.name_scope('delta_delta'):
            delta_delta = py_x_ops.delta_delta(feat, order, window)

        n_frame, n_feats = feat.get_shape().as_list()
        delta_delta = tf.reshape(delta_delta, (n_frame, n_feats, order + 1))

        return delta_delta
コード例 #26
0
def splice_layer(x, name, context):
  '''
  Splice a tensor along the last dimension with context.
  e.g.:
  t = [[[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]]
  splice_tensor(t, [0, 1]) =
      [[[1, 2, 3, 4, 5, 6],
        [4, 5, 6, 7, 8, 9],
        [7, 8, 9, 7, 8, 9]]]

  Args:
    tensor: a tf.Tensor with shape (B, T, D) a.k.a. (N, H, W)
    context: a list of context offsets

  Returns:
    spliced tensor with shape (..., D * len(context))
  '''
  with tf.variable_scope(name):
    input_shape = tf.shape(x)
    B, T = input_shape[0], input_shape[1]
    context_len = len(context)
    array = tf.TensorArray(x.dtype, size=context_len)
    for idx, offset in enumerate(context):
      begin = offset
      end = T + offset
      if begin < 0:
        begin = 0
        sliced = x[:, begin:end, :]
        tiled = tf.tile(x[:, 0:1, :], [1, abs(offset), 1])
        final = tf.concat((tiled, sliced), axis=1)
      else:
        end = T
        sliced = x[:, begin:end, :]
        tiled = tf.tile(x[:, -1:, :], [1, abs(offset), 1])
        final = tf.concat((sliced, tiled), axis=1)
      array = array.write(idx, final)
    spliced = array.stack()
    spliced = tf.transpose(spliced, (1, 2, 0, 3))
    spliced = tf.reshape(spliced, (B, T, -1))
  return spliced
コード例 #27
0
def conv_pool(embedded_chars_expanded, filter_sizes, embedding_size,
              num_filters, sequence_length):
    """
    text conv and max pooling to get one-dimension vector to representation of text
    :param filter_sizes:
    :return:
    """
    pooled_outputs = []
    for _, filter_size in enumerate(filter_sizes):
        with tf.variable_scope("conv-maxpool-%s" % filter_size):
            # Convolution Layer
            filter_shape = [filter_size, embedding_size, 1, num_filters]
            W = tf.get_variable(name='W',
                                initializer=tf.truncated_normal(filter_shape,
                                                                stddev=0.1))
            b = tf.get_variable(name='b',
                                initializer=tf.constant(0.1,
                                                        shape=[num_filters]))
            conv = tf.nn.conv2d(embedded_chars_expanded,
                                W,
                                strides=[1, 1, 1, 1],
                                padding="VALID",
                                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            # Maxpooling over the outputs
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, sequence_length - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="pool")
            pooled_outputs.append(pooled)
    # Combine all the pooled features
    num_filters_total = num_filters * len(filter_sizes)

    h_pool = tf.concat(pooled_outputs, 3)

    h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
    return h_pool_flat
コード例 #28
0
def compute_batch_indices(batch_size, beam_size):
    """Computes the i'th coordinate that contains the batch index for gathers."""
    batch_pos = tf.range(batch_size * beam_size) // beam_size
    batch_pos = tf.reshape(batch_pos, [batch_size, beam_size])
    return batch_pos
コード例 #29
0
def _unmerge_beam_dim(tensor, batch_size, beam_size):
    """Reshapes first dimension back to [batch_size, beam_size]."""
    shape = shape_list(tensor)
    new_shape = [batch_size] + [beam_size] + shape[1:]
    return tf.reshape(tensor, new_shape)
コード例 #30
0
ファイル: tf_metrics.py プロジェクト: lizhanyang505/delta-1
def confusion_matrix(logits, labels, num_class):
    ''' confusion matrix candies '''
    return tf.confusion_matrix(labels=tf.reshape(labels, [-1]),
                               predictions=tf.reshape(tf.argmax(logits, -1),
                                                      [-1]),
                               num_classes=num_class)