def _dpool_index(one_length_left, one_length_right, fixed_length_left, fixed_length_right): logging.info("fixed_length_left: {}".format(fixed_length_left)) logging.info("fixed_length_right: {}".format(fixed_length_right)) if one_length_left == 0: stride_left = fixed_length_left else: stride_left = 1.0 * fixed_length_left / tf.cast( one_length_left, dtype=tf.float32) if one_length_right == 0: stride_right = fixed_length_right else: stride_right = 1.0 * fixed_length_right / tf.cast( one_length_right, dtype=tf.float32) one_idx_left = [ tf.cast(i / stride_left, dtype=tf.int32) for i in range(fixed_length_left) ] one_idx_right = [ tf.cast(i / stride_right, dtype=tf.int32) for i in range(fixed_length_right) ] mesh1, mesh2 = tf.meshgrid(one_idx_left, one_idx_right) index_one = tf.transpose(tf.stack([mesh1, mesh2]), (2, 1, 0)) return index_one
def call(self, inputs, training=None, mask=None): batch_size = tf.shape(inputs)[0] W_3d = tf.tile(tf.expand_dims(self.W, axis=0), tf.stack([batch_size, 1, 1])) # [batch_size, steps, features] input_projection = tf.matmul(inputs, W_3d) if self.use_bias: input_projection += self.b input_projection = tf.tanh(input_projection) # [batch_size, steps, 1] similaritys = tf.reduce_sum(tf.multiply(input_projection, self.attention_context_vector), axis=2, keep_dims=True) # [batch_size, steps, 1] if mask is not None: attention_weights = masked_softmax(similaritys, mask, axis=1) else: attention_weights = tf.nn.softmax(similaritys, axis=1) # [batch_size, features] attention_output = tf.reduce_sum(tf.multiply(inputs, attention_weights), axis=1) return attention_output
def compute_topk_scores_and_seq(sequences, scores, scores_to_gather, flags, beam_size, batch_size, prefix="default", states_to_gather=None): """Given sequences and scores, will gather the top k=beam size sequences.""" _, topk_indexes = tf.nn.top_k(scores, k=beam_size) batch_pos = compute_batch_indices(batch_size, beam_size) top_coordinates = tf.stack([batch_pos, topk_indexes], axis=2) def gather(tensor, name): return tf.gather_nd(tensor, top_coordinates, name=(prefix + name)) topk_seq = gather(sequences, "_topk_seq") topk_flags = gather(flags, "_topk_flags") topk_gathered_scores = gather(scores_to_gather, "_topk_scores") if states_to_gather: topk_gathered_states = nest.map_structure( lambda state: gather(state, "_topk_states"), states_to_gather) else: topk_gathered_states = states_to_gather return topk_seq, topk_gathered_scores, topk_flags, topk_gathered_states
def split_one_doc_to_true_len_sens(doc_t, split_token, padding_token, max_doc_len, max_sen_len): """ Split a document to sentences with true sentence lengths. doc_t: [doc_word_len] out_t: [max_doc_len, max_sen_len] """ if len(doc_t.get_shape()) == 1: split_token_index = tf.squeeze(tf.where(tf.equal(doc_t, split_token)), axis=1) split_token_index.set_shape([None]) split_len_part_1 = split_token_index[:1] + 1 split_len_part_2 = split_token_index[1:] - split_token_index[:-1] split_lens = tf.concat([split_len_part_1, split_len_part_2], axis=0) split_lens = cut_or_padding(split_lens, max_doc_len, padding_token=padding_token) new_doc_len = tf.reduce_sum(split_lens) splited_sentences = tf.split(doc_t[:new_doc_len], split_lens) splited_sentences = [ cut_or_padding(s, max_sen_len) for s in splited_sentences ] out_t = tf.stack(splited_sentences) padding_tokens = tf.multiply(tf.ones_like(out_t, dtype=tf.int32), padding_token) out_t = tf.where(tf.equal(out_t, split_token), padding_tokens, out_t) return out_t raise ValueError("doc_t should be a tensor with rank 1.")
def grow_topk(i, alive_seq, alive_log_probs, states): """Inner beam search loop.""" flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1]) # (batch_size * beam_size, decoded_length) if states: flat_states = nest.map_structure(_merge_beam_dim, states) flat_logits, flat_states = symbols_to_logits_fn( flat_ids, i, flat_states) states = nest.map_structure( lambda t: _unmerge_beam_dim(t, batch_size, beam_size), flat_states) else: flat_logits = symbols_to_logits_fn(flat_ids) logits = tf.reshape(flat_logits, [batch_size, beam_size, -1]) candidate_log_probs = log_prob_from_logits(logits) log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2) length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha) curr_scores = log_probs / length_penalty flat_curr_scores = tf.reshape(curr_scores, [-1, beam_size * vocab_size]) topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores, k=beam_size * 2) topk_log_probs = topk_scores * length_penalty topk_beam_index = topk_ids // vocab_size topk_ids %= vocab_size # Unflatten the ids batch_pos = compute_batch_indices(batch_size, beam_size * 2) topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2) topk_seq = tf.gather_nd(alive_seq, topk_coordinates) if states: states = nest.map_structure( lambda state: tf.gather_nd(state, topk_coordinates), states) topk_seq = tf.concat( [topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2) topk_finished = tf.equal(topk_ids, eos_id) return topk_seq, topk_log_probs, topk_scores, topk_finished, states
def call(self, inputs: list, **kwargs) -> typing.Any: """ The computation logic of MatchingLayer. :param inputs: two input tensors. """ x1 = inputs[0] x2 = inputs[1] if self._matching_type == 'dot': if self._normalize: x1 = tf.math.l2_normalize(x1, axis=2) x2 = tf.math.l2_normalize(x2, axis=2) return tf.expand_dims(tf.einsum('abd,acd->abc', x1, x2), 3) else: if self._matching_type == 'mul': def func(x, y): return x * y elif self._matching_type == 'plus': def func(x, y): return x + y elif self._matching_type == 'minus': def func(x, y): return x - y elif self._matching_type == 'concat': def func(x, y): return tf.concat([x, y], axis=3) else: raise ValueError(f"Invalid matching type." f"{self._matching_type} received." f"Mut be in `dot`, `mul`, `plus`, " f"`minus` and `concat`.") x1_exp = tf.stack([x1] * self._shape2[1], 2) x2_exp = tf.stack([x2] * self._shape1[1], 1) return func(x1_exp, x2_exp)
def test_extract_feature(self): ''' test extract feature ''' with self.cached_session(use_gpu=False, force_gpu=False): wavfile = tf.constant(self.wavpath) audio = tffeat.read_wav(wavfile, self.params) # slice and tile to batch audio = tf.stack([audio[:1000]] * 32) feature = tffeat.extract_feature(audio, self.params) self.assertEqual(audio.eval().shape, (32, 1000)) self.assertAllEqual(audio.eval()[0], self.audio_true[:1000]) self.assertEqual(feature.eval().shape, (32, 13, 40, 3))
def test_batch_extract_feature(self): ''' test batched feature extraction ''' #pylint: disable=invalid-name hp = tffeat.speech_params(sr=self.sr_true, bins=40, cmvn=False, audio_desired_samples=1000, add_delta_deltas=True) batch_size = 2 with self.cached_session(use_gpu=False, force_gpu=False): wavfile = tf.constant(self.wavpath) # read wav audio, sample_rate = tffeat.read_wav(wavfile, hp) del sample_rate audio = tf.stack([audio] * batch_size) # fbank with delta delta and cmvn feature = tffeat.batch_extract_feature(audio, hp) self.assertEqual(feature.eval().shape, (batch_size, 11, 40, 3))