def model(self, feats, labels): ''' Build the model. ''' x = self.resnet(feats) with tf.variable_scope("avg_pooling"): batch_t = tf.shape(x)[0] time_t = tf.shape(x)[1] feat, channel = x.shape.as_list()[2:] x = tf.reshape(x, [batch_t, time_t, feat * channel]) x = self.pooling_layer(x, pooling_type='average') with tf.variable_scope("output_layer"): shape = x.shape.as_list() shape = shape[-1] hidden_dims = self.params().embedding_size y = x y = common_layers.linear(y, 'dense-matmul', [shape, hidden_dims], has_bias=True) y = tf.layers.batch_normalization(y, axis=-1, momentum=0.99, training=self.train, name='dense-bn') embedding = y dense_output = y logits = self.logits_layer(dense_output, labels) model_outputs = {'logits': logits, 'embeddings': embedding} return model_outputs
def cross_entropy(logits, labels, input_length=None, label_length=None, smoothing=0.0, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS): ''' cross entropy function for classfication and seq classfication :param, label_length, for seq task, this for target seq length, e.g. a b c </s>, 4 ''' del input_length onehot_labels = tf.cond(pred=tf.equal( tf.rank(logits) - tf.rank(labels), 1), true_fn=lambda: tf.one_hot( labels, tf.shape(logits)[-1], dtype=tf.int32), false_fn=lambda: labels) if label_length is not None: max_len = tf.shape(logits)[1] weights = utils.len_to_mask(label_length, max_len) else: weights = 1.0 loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits, weights=weights, label_smoothing=smoothing, reduction=reduction) return loss
def test_linear(self): '''test linear''' inputs = tf.random_uniform(shape=[4, 5], dtype=tf.float32, maxval=1.0) # A 2D tensor shape = [5, 4] output = cl.linear(inputs, 'test_linear0', shape) output_shape = [4, 4] self.assertAllEqual(tf.shape(output), output_shape) inputs = tf.random_uniform(shape=[2, 4, 5], dtype=tf.float32, maxval=1.0) # A 3D tensor shape = [5, 4] output = cl.linear(inputs, 'test_linear1', shape) output_shape = [2, 4, 4] self.assertAllEqual(tf.shape(output), output_shape) # A 4D tensor [B, C, H, W] inputs = tf.random_uniform(shape=[2, 3, 4, 5], dtype=tf.float32, maxval=1.0) shape = [5, 4] output = cl.linear(inputs, 'test_linear2', shape) output_shape = [2, 3, 4, 4] self.assertAllEqual(tf.shape(output), output_shape)
def test_tdnn(self): '''test tdnn''' #A 3D Tensor [batch, in_width, in_channels] inputs = tf.random_uniform(shape=[2, 5, 3], dtype=tf.float32, maxval=1.0) in_dim = inputs.get_shape().as_list()[2] out_dim = 4 context = [-2, -1, 0, 1, 2] output = cl.tdnn(inputs, 'test_tdnn0', in_dim, context, out_dim, method='splice_layer') out_shape = [2, 5, 4] self.assertAllEqual(tf.shape(output), out_shape) context = 2 #output = cl.tdnn(inputs, 'test_tdnn1', in_dim, context, out_dim, method='splice_op') #self.assertAllEqual(tf.shape(output), out_shape) output = cl.tdnn(inputs, 'test_tdnn2', in_dim, context, out_dim, method='conv1d') self.assertAllEqual(tf.shape(output), out_shape)
def test_attention(self): '''test attention''' # A 3D tensor [B, T, D] inputs = tf.random_uniform(shape=[2, 100, 512], dtype=tf.float32, maxval=1.0) attention_size = 256 output, alpha = cl.attention(inputs, attention_size, return_alphas=True) output_shape = [2, 512] alpha_shape = [2, 100, 1] self.assertAllEqual(tf.shape(output), output_shape) self.assertAllEqual(tf.shape(alpha), alpha_shape)
def cut_or_padding(origin_t, new_length, padding_token=0): """ If too long, cut the tensor; else pad the tensor. origin_t: [batch_size, time_steps_1] or [time_steps_1] new_t: [batch_size, time_steps_2] or [time_steps_2] """ if len(origin_t.get_shape()) == 1: dim = 1 cur_length = tf.shape(origin_t)[0] elif len(origin_t.get_shape()) == 2: dim = 2 cur_length = tf.shape(origin_t)[1] else: raise ValueError("origin_t should be a tensor with rank 1 or 2.") def cut_tensor(): if dim == 1: new_t = origin_t[:new_length] else: new_t = origin_t[:, :new_length] return new_t def pad_tail_tensor(): if dim == 1: shape = tf.constant([1, 2]) indices = tf.constant([[0, 1]]) else: shape = tf.constant([2, 2]) indices = tf.constant([[1, 1]]) updates = [new_length - cur_length] paddings = tf.scatter_nd(indices, updates, shape) new_t = tf.pad(origin_t, paddings, "CONSTANT", constant_values=padding_token) return new_t new_t = tf.cond(cur_length < new_length, true_fn=pad_tail_tensor, false_fn=cut_tensor) if dim == 1: new_t.set_shape([new_length]) else: new_t.set_shape([origin_t.get_shape()[0], new_length]) return new_t
def call(self, inputs, training=None, mask=None): batch_size = tf.shape(inputs)[0] W_3d = tf.tile(tf.expand_dims(self.W, axis=0), tf.stack([batch_size, 1, 1])) # [batch_size, steps, features] input_projection = tf.matmul(inputs, W_3d) if self.use_bias: input_projection += self.b input_projection = tf.tanh(input_projection) # [batch_size, steps, 1] similaritys = tf.reduce_sum(tf.multiply(input_projection, self.attention_context_vector), axis=2, keep_dims=True) # [batch_size, steps, 1] if mask is not None: attention_weights = masked_softmax(similaritys, mask, axis=1) else: attention_weights = tf.nn.softmax(similaritys, axis=1) # [batch_size, features] attention_output = tf.reduce_sum(tf.multiply(inputs, attention_weights), axis=1) return attention_output
def test_jieba_cut_op_no_file(self): ''' test jieba ''' graph = tf.Graph() with graph.as_default(): sentence_in = tf.placeholder(dtype=tf.string, shape=[None], name="sentence_in") sentence_out = self.build_op_no_file(sentence_in) shape_op = tf.shape(sentence_out) with self.cached_session(use_gpu=False, force_gpu=False) as sess: # self.assertShapeEqual(tf.shape(sentence_in), tf.shape(sentence_out)) sentence_out_res = test_one(sess, sentence_out, {sentence_in: ["我爱北京天安门"]}) self.assertEqual("我 爱 北京 天安门", sentence_out_res[0].decode("utf-8")) sentence_out_res = test_one(sess, sentence_out, {sentence_in: ["吉林省长春药店"]}) self.assertEqual("吉林省 长春 药店", sentence_out_res[0].decode("utf-8")) sentence_out_res, shape_res = test_one( sess, [sentence_out, shape_op], {sentence_in: ["吉林省长春药店", "南京市长江大桥"]}) self.assertEqual( "吉林省 长春 药店\n南京市 长江大桥", "\n".join([ one_sen.decode("utf-8") for one_sen in sentence_out_res ])) logging.info(f"shape: {shape_res}") self.assertAllEqual(shape_res, [2])
def call(self, audio_data, sample_rate=None): """ Caculate mfcc features of audio data. :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor. :param sample_rate: the samplerate of the signal we working with. :return: A float tensor of size (num_channels, num_frames, num_frequencies) containing mfcc features of every frame in speech. """ p = self.config with tf.name_scope('mfcc'): if sample_rate == None: sample_rate = tf.constant(p.sample_rate, dtype=tf.int32) assert_op = tf.assert_equal(tf.constant(p.sample_rate), tf.cast(sample_rate, dtype=tf.int32)) with tf.control_dependencies([assert_op]): fbank_feats = self.fbank(audio_data, sample_rate) sample_rate = tf.cast(sample_rate, dtype=tf.int32) shape = tf.shape(fbank_feats) nframe = shape[0] nfbank = shape[1] fbank_feats = tf.reshape(fbank_feats, (1, nframe, nfbank)) framepow_feats = self.framepow(audio_data, sample_rate) mfcc = py_x_ops.mfcc(fbank_feats, framepow_feats, sample_rate, use_energy=p.use_energy, cepstral_lifter=p.cepstral_lifter, coefficient_count=p.coefficient_count) return mfcc
def scaled_dot_product_attention(q, k, v, mask): """ The implementation of scaled attention. Args: v: (batch_size, seq_len_v, hidden_size) k: (batch_size, seq_len_k, hidden_size) q: (batch_size, seq_len_q, hidden_size) mask: (batch_size, seq_len_q, seq_len_k) Returns: output: (batch_size, seq_len_q, hidden_size) attention_weights: (batch_size, num_heads, seq_len_q, seq_len_k) """ matmul_qk = tf.matmul( q, k, transpose_b=True) # (batch_size, seq_len_q, seq_len_k) # Scaled dk = tf.cast(tf.shape(k)[-1], tf.float32) scaled_attention_logits = matmul_qk / tf.math.sqrt(dk) # Masked if mask is not None: scaled_attention_logits += (mask * -1e9) # Normalized attention_weights = tf.nn.softmax( scaled_attention_logits, axis=-1) # (batch_size, seq_len_q, seq_len_k) # Weighted sum output = tf.matmul(attention_weights, v) # (batch_size, seq_len_q, depth_v) return output, attention_weights
def accuracy(logits, labels): ''' accuracy candies params: logits: [B, ..., D] labels: [B, ...] return: accuracy tensor ''' with tf.name_scope('accuracy'): assert_rank = tf.assert_equal(tf.rank(logits), tf.rank(labels) + 1) assert_shape = tf.assert_equal(tf.shape(logits)[:-1], tf.shape(labels)) with tf.control_dependencies([assert_rank, assert_shape]): predictions = tf.argmax(logits, axis=-1, output_type=tf.int64) labels = tf.cast(labels, tf.int64) return tf.reduce_mean( tf.cast(tf.equal(predictions, labels), dtype=tf.float32))
def tdnn_block(self, inputs): ''' TDNN layers. ''' if 'tdnn_method' in self.netconf: tdnn_method = self.netconf['tdnn_method'] else: # Runs faster, support discrete context, for now. tdnn_method = 'splice_layer' tdnn_contexts = self.netconf['tdnn_contexts'] logging.info("tdnn_contexts : {}".format(tdnn_contexts)) tdnn_dims = self.netconf['tdnn_dims'] logging.info("tdnn_dims : {}".format(tdnn_dims)) layer_num = len(tdnn_contexts) assert layer_num == len(tdnn_dims) channels = [self.input_channels] + tdnn_dims logging.info("tdnn_channels : {}".format(channels)) input_h_t = tf.shape(inputs)[1] input_w = inputs.shape[2] input_c = inputs.shape[3] if tdnn_method == 'conv1d': # NHWC -> NW'C, W' = H * W inputs = tf.reshape(inputs, [-1, input_h_t * input_w, input_c]) last_w = channels[0] else: inputs = tf.reshape(inputs, [-1, input_h_t, input_w * input_c]) last_w = input_w * input_c downsample_input_len = self.input_len with tf.variable_scope('tdnn'): x = tf.identity(inputs) for index in range(layer_num): unit_name = 'unit-' + str(index + 1) with tf.variable_scope(unit_name): tdnn_name = 'tdnn-' + str(index + 1) x = common_layers.tdnn(x, tdnn_name, last_w, tdnn_contexts[index], channels[index + 1], has_bias=True, method=tdnn_method) last_w = channels[index + 1] x = tf.nn.relu(x) if self.netconf['use_bn']: bn_name = 'bn' + str(index + 1) x = tf.layers.batch_normalization(x, axis=-1, momentum=0.9, training=self.train, name=bn_name) if self.netconf['use_dropout']: x = tf.layers.dropout(x, self.netconf['dropout_rate'], training=self.train) downsample_input_len = downsample_input_len return x, downsample_input_len
def test_embedding_look_up(self): '''test embedding look up''' text_inputs = [0, 1, 2] vocab_size = 3 embedding_size = 512 output = cl.embedding_look_up(text_inputs, vocab_size, embedding_size) output_shape = [3, 512, 1] self.assertAllEqual(tf.shape(output), output_shape)
def shape_list(tensor): """Return list of dims, statically where possible.""" tensor = tf.convert_to_tensor(tensor) if tensor.get_shape().dims is None: return tf.shape(tensor) static = tensor.get_shape().as_list() shape = tf.shape(tensor) ret = [] for i, _ in enumerate(static): dim = static[i] if dim is None: dim = shape[i] ret.append(dim) return ret
def splice(feat, left_context, right_context): ''' splice frame with context param: feat, tf.float32, [batch, time, feat] return: feat, tf.float32, [batch, time, feat*(left_context + 1 + right_context)] reference: https://github.com/kaldi-asr/kaldi/src/feat/feature-functions.cc#L205:6 ''' def _loop_continue(time, end_time, context, unused_left_context, right_context, unused_output_tas): del unused_output_tas del unused_left_context return time < end_time def _loop_body(time, end_time, context, left_context, right_context, output_tas): shape = tf.shape(context) B, _, D = shape[0], shape[1], shape[2] N = (1 + left_context + right_context) * D new_feat = context[:, time:time + left_context + 1 + right_context, :] new_feat = tf.reshape(new_feat, [B, N]) new_output_tas = output_tas.write(time, new_feat) return (time + 1, end_time, context, left_context, right_context, new_output_tas) with tf.control_dependencies([ tf.assert_greater_equal(left_context, 0), tf.assert_greater_equal(right_context, 0) ]): T = tf.shape(feat)[1] output_tas = _new_tensor_array('splice_feat_ta', T, dtype=tf.float32) time = tf.constant(0, tf.int32) first = tf.tile(feat[:, 0:1, :], [1, left_context, 1]) last = tf.tile(feat[:, -1:, :], [1, right_context, 1]) context = tf.concat([first, feat], axis=1) context = tf.concat([context, last], axis=1) loop_vars = (time, T, context, left_context, right_context, output_tas) parallel_iterations = 10 shape_invariants = tf.nest.map_structure( lambda t: tf.TensorShape(None), loop_vars) (time, end_time, context, left_context, right_context, output_tas) = tf.while_loop(_loop_continue, _loop_body, loop_vars=loop_vars, shape_invariants=shape_invariants, parallel_iterations=parallel_iterations, swap_memory=False) del context del left_context del right_context batch_spliced_feats = output_tas.stack() batch_spliced_feats = tf.transpose(batch_spliced_feats, [1, 0, 2]) return batch_spliced_feats
def shape_list(x): """Return list of dims, statically where possible.""" x = tf.convert_to_tensor(x) # If unknown rank, return dynamic shape if x.get_shape().dims is None: return tf.shape(x) static = x.get_shape().as_list() shape = tf.shape(x) ret = [] for i, _ in enumerate(static): dim = static[i] if dim is None: dim = shape[i] ret.append(dim) return ret
def test_chinese_word(self): config = utils.load_config(self.config_file) class_num = config["data"]["task"]["classes"]["num_classes"] data_config = config["data"] task_config = data_config["task"] task_config["language"] = "chinese" task_config["split_by_space"] = False task_config["use_word"] = True data_config = config["data"] data_config["train"]["paths"] = \ ["egs/mock_text_cls_data/text_cls/v1/data/train.chinese_word.txt"] data_config["eval"]["paths"] = \ ["egs/mock_text_cls_data/text_cls/v1/data/eval.chinese_word.txt"] data_config["infer"]["paths"] = \ ["egs/mock_text_cls_data/text_cls/v1/data/test.chinese_word.txt"] task_config[ "text_vocab"] = "egs/mock_text_cls_data/text_cls/v1/data/text_vocab.chinese_word.txt" task_config["need_shuffle"] = False config["model"]["split_token"] = "" task_config["preparer"]["reuse"] = False task = TextClsTask(config, utils.TRAIN) # test offline data data = task.dataset() self.assertTrue("input_x_dict" in data and "input_x" in data["input_x_dict"]) self.assertTrue("input_y_dict" in data and "input_y" in data["input_y_dict"]) with self.cached_session(use_gpu=False, force_gpu=False) as sess: sess.run(data["iterator"].initializer) res = sess.run([ data["input_x_dict"]["input_x"], data["input_y_dict"]["input_y"] ]) logging.debug(res[0][0]) logging.debug(res[1][0]) self.assertAllEqual(res[0][0][:5], [2, 0, 0, 0, 0]) self.assertEqual(np.shape(res[1]), (32, class_num)) # test online data export_inputs = task.export_inputs() self.assertTrue("export_inputs" in export_inputs and "input_sentence" in export_inputs["export_inputs"]) input_sentence = export_inputs["export_inputs"]["input_sentence"] input_x = export_inputs["model_inputs"]["input_x"] shape_op = tf.shape(input_x) with self.cached_session(use_gpu=False, force_gpu=False) as sess: res, shape_res = sess.run([input_x, shape_op], feed_dict={input_sentence: ["我很愤怒"]}) logging.debug(res[0]) logging.debug(np.shape(res[0])) logging.debug(f"shape: {shape_res}") self.assertAllEqual(shape_res, [1, 1024]) self.assertAllEqual(res[0][:5], [4, 5, 0, 0, 0])
def _loop_body(time, end_time, context, left_context, right_context, output_tas): shape = tf.shape(context) B, _, D = shape[0], shape[1], shape[2] N = (1 + left_context + right_context) * D new_feat = context[:, time:time + left_context + 1 + right_context, :] new_feat = tf.reshape(new_feat, [B, N]) new_output_tas = output_tas.write(time, new_feat) return (time + 1, end_time, context, left_context, right_context, new_output_tas)
def test_maxpool(self): '''test maxpool''' inputs = tf.reshape(tf.range(25), shape=[1, 5, 5, 1]) #A 4D tensor ksize = [3, 3] strides = [1, 1] output = cl.max_pool(inputs, ksize, strides) output_shape = [1, 3, 3, 1] self.assertAllEqual(tf.shape(output), output_shape) output_true = tf.constant([[[[12], [13], [14]], [[17], [18], [19]], [[22], [23], [24]]]]) self.assertAllEqual(output, output_true)
def _reshape_mask(mask): """ repeat mask for multi head Input shape: (Batch size, steps) Output shape: (Batch size * head num, steps) """ if mask is None: return None seq_len = tf.shape(mask)[1] mask = tf.expand_dims(mask, axis=1) mask = tf.tile(mask, [1, self.head_num, 1]) return tf.reshape(mask, shape=(-1, seq_len))
def test_conv2d(self): '''test conv2d''' inputs = tf.random_uniform(shape=[2, 5, 5, 3], dtype=tf.float32, maxval=1.0) #A 4D Tensor filter_size = [3, 3] in_channels = inputs.get_shape().as_list()[3] out_channels = 4 strides = [1, 1] output = cl.conv2d(inputs, 'test_conv2d', filter_size, in_channels, out_channels, strides) output_shape = [2, 5, 5, 4] self.assertAllEqual(tf.shape(output), output_shape)
def linear_block(self, x): ''' linear layer for dim reduction x: shape [batch, time, feat, channel] output: shape [b, t, f] ''' batch_t = tf.shape(x)[0] time_t = tf.shape(x)[1] feat, channel = x.shape.as_list()[2:] linear_num = self.netconf['linear_num'] if linear_num > 0: with tf.variable_scope('linear'): x = tf.reshape(x, [batch_t * time_t, feat * channel]) if self.netconf['use_dropout']: x = tf.layers.dropout(x, self.netconf['dropout_rate'], training=self.train) x = common_layers.linear(x, 'linear1', [feat * channel, linear_num]) x = tf.nn.relu(x) if self.netconf['use_bn']: bn_name = 'bn_linear' x = tf.layers.batch_normalization(x, axis=-1, momentum=0.9, training=self.train, name=bn_name) x = tf.reshape(x, [batch_t, time_t, linear_num]) else: logging.info('linear_num <= 0, only apply reshape.') x = tf.reshape(x, [batch_t, time_t, feat * channel]) return x
def test_conv_pool(self): '''test conv pool''' # A 4D tensor [B, H, W, C] embedded_chars_expanded = tf.random_uniform(shape=[2, 7, 7, 1], dtype=tf.float32, maxval=1.0) filter_sizes = [3, 5] embedding_size = 3 num_filters = 3 sequence_length = 5 output = cl.conv_pool(embedded_chars_expanded, filter_sizes, embedding_size, num_filters, sequence_length) output_shape = [30, 6] self.assertAllEqual(tf.shape(output), output_shape)
def call(self, inputs, training=None, mask=None): """ The implementation of Multi-headed attention. Args: inputs = (v, k, q) q: (batch_size, seq_len_q, hidden_size) k: (batch_size, seq_len_k, hidden_size) v: (batch_size, seq_len_v, hidden_size) mask: (batch_size, seq_len_q, seq_len_k) Returns: output: (batch_size, seq_len_q, hidden_size) attention_weights: (batch_size, num_heads, seq_len_q, seq_len_k) """ q, k, v = inputs batch_size = tf.shape(q)[0] q = self.wq(q) # (batch_size, seq_len_q, hidden_size) k = self.wk(k) # (batch_size, seq_len_k, hidden_size) v = self.wv(v) # (batch_size, seq_len_v, hidden_size) q = self.split_heads( q, batch_size) # (batch_size, num_heads, seq_len_q, depth) k = self.split_heads( k, batch_size) # (batch_size, num_heads, seq_len_k, depth) v = self.split_heads( v, batch_size) # (batch_size, num_heads, seq_len_v, depth) # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth) # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k) scaled_attention, attention_weights = self.scaled_dot_product_attention( q, k, v, mask) scaled_attention = tf.transpose( scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, num_heads, depth) concat_attention = tf.reshape( scaled_attention, (batch_size, -1, self.hidden_size)) # (batch_size, seq_len_q, hidden_size) output = self.dense( concat_attention) # (batch_size, seq_len_q, hidden_size) return output, attention_weights
def delta_delta(feat, order=2): ''' params: feat: a tensor of shape [nframe, nfbank] or [nframe, nfbank, 1] return: [nframe, nfbank, 3] ''' feat = tf.cond(tf.equal(tf.rank(feat), 3), true_fn=lambda: feat[:, :, 0], false_fn=lambda: feat) shape = tf.shape(feat) # [nframe nfbank*3] nframe = shape[0] nfbank = shape[1] delta = py_x_ops.delta_delta(feat, order=order) feat_with_delta_delta = tf.reshape(delta, (nframe, nfbank, (order + 1))) return feat_with_delta_delta
def batch_extract_feature(waveforms, params): ''' waveforms: [batch, samples, audio_channels] return: features [batch, nframes, feat_size, channles] ''' def _to_tensor_array(name, v, clear_after_read=None): ''' create TensorArray from v, of size batch.''' ta = tf.TensorArray(v.dtype, batch, name=name, clear_after_read=clear_after_read) ta = ta.unstack(v) return ta def _loop_continue(time, inputs, unused_output_tas): del unused_output_tas batch = tf.shape(inputs)[0] return time < batch def _loop_body(time, inputs, output_tas): feat = extract_feature(inputs[time, ...], params) new_output_tas = output_tas.write(time, feat) return (time + 1, inputs, new_output_tas) batch = tf.shape(waveforms)[0] output_tas = _new_tensor_array('batch_feat', batch, dtype=tf.float32) time = tf.constant(0, tf.int32) loop_vars = (time, waveforms, output_tas) parallel_iterations = 10 shape_invariants = tf.nest.map_structure(lambda t: tf.TensorShape(None), loop_vars) (time, inputs, output_tas) = tf.while_loop(_loop_continue, _loop_body, loop_vars=loop_vars, shape_invariants=shape_invariants, parallel_iterations=parallel_iterations, swap_memory=False) del inputs batch_feats = output_tas.stack() return batch_feats
def splice_layer(x, name, context): ''' Splice a tensor along the last dimension with context. e.g.: t = [[[1, 2, 3], [4, 5, 6], [7, 8, 9]]] splice_tensor(t, [0, 1]) = [[[1, 2, 3, 4, 5, 6], [4, 5, 6, 7, 8, 9], [7, 8, 9, 7, 8, 9]]] Args: tensor: a tf.Tensor with shape (B, T, D) a.k.a. (N, H, W) context: a list of context offsets Returns: spliced tensor with shape (..., D * len(context)) ''' with tf.variable_scope(name): input_shape = tf.shape(x) B, T = input_shape[0], input_shape[1] context_len = len(context) array = tf.TensorArray(x.dtype, size=context_len) for idx, offset in enumerate(context): begin = offset end = T + offset if begin < 0: begin = 0 sliced = x[:, begin:end, :] tiled = tf.tile(x[:, 0:1, :], [1, abs(offset), 1]) final = tf.concat((tiled, sliced), axis=1) else: end = T sliced = x[:, begin:end, :] tiled = tf.tile(x[:, -1:, :], [1, abs(offset), 1]) final = tf.concat((sliced, tiled), axis=1) array = array.write(idx, final) spliced = array.stack() spliced = tf.transpose(spliced, (1, 2, 0, 3)) spliced = tf.reshape(spliced, (B, T, -1)) return spliced
def _make_example(uttids, feats, ilens, targets, olens): features = { 'uttids': uttids, 'inputs': tf.expand_dims(feats, axis=-1) if not isinstance(feats, np.ndarray) else np.expand_dims(feats, axis=-1), 'input_length': ilens, 'targets': targets, 'target_length': olens } labels = { 'ctc': tf.ones(tf.shape(feats)[0]) if not isinstance(feats, np.ndarray) else np.ones(feats.shape[0]) } # dummy data for dummy loss function return features, labels
def call(self, inputs: list, **kwargs) -> typing.Any: """ The computation logic of DynamicPoolingLayer. :param inputs: two input tensors. """ self._validate_dpool_size() x, dpool_index = inputs dpool_shape = tf.shape(dpool_index) batch_index_one = tf.expand_dims( tf.expand_dims(tf.range(dpool_shape[0]), axis=-1), axis=-1) batch_index = tf.expand_dims( tf.tile(batch_index_one, [1, self._msize1, self._msize2]), axis=-1) dpool_index_ex = tf.concat([batch_index, dpool_index], axis=3) x_expand = tf.gather_nd(x, dpool_index_ex) stride1 = self._msize1 // self._psize1 stride2 = self._msize2 // self._psize2 x_pool = tf.nn.max_pool(x_expand, [1, stride1, stride2, 1], [1, stride1, stride2, 1], "VALID") return x_pool
def transform_preprocess(labels=None, blank_index=None, num_class=None): ''' Ensure that the value of blank_index is in a reasonable range, and transform the DenseTensor labels to a SparseTensor ''' if blank_index is None or blank_index < 0: raise ValueError('blank_index must be greater than or equal to zero') if not num_class is None and blank_index > (num_class - 1): raise ValueError('blank_index must be less than or equal to num_class - 1') if labels is None: return None if not isinstance(labels, tf.SparseTensor): labels = tf.cast(labels, tf.int32) labels_idx = tf.where(tf.not_equal(labels, 0)) labels_values = tf.gather_nd(labels, labels_idx) labels_shape = tf.cast(tf.shape(labels), dtype=tf.int64) labels = tf.SparseTensor( indices=labels_idx, values=labels_values, dense_shape=labels_shape) return labels