def lookup_char_emb(text,c2v_vocab, c2v_emb, dim_c2v_emb): str_tensor = tf.string_split(text) str_split = tf.sparse_reshape(str_tensor,[-1]) str_split,text_mask = tf.sparse_fill_empty_rows(str_split,"") #return str_split #str_split = tf.sparse_tensor_to_dense(str_split,default_value="") #char_split = tf.string_split(str_split.values,'') char_split = tf.string_split(str_split.values,'') #return char_split #return tf.SparseTensor(indices=tf.stack([str_split.indices,res.indices],axis=1),values = res.values, dense_shape=tf.stack([str_split.dense_shape[0],str_split.dense_shape[1],res.dense_shape[0], res.dense_shape[1]])) #return char_split #char_tensor_indices = tf.transpose(tf.stack([tf.gather(str_split.indices[:,0],char_split.indices[:,0]),tf.gather(str_split.indices[:,1],char_split.indices[:,0])])) char_tensor = tf.SparseTensor(indices = char_split.indices, values = c2v_vocab.lookup(char_split.values), dense_shape = char_split.dense_shape) #return char_tensor #char_tensor = tf.SparseTensor(indices = char_split.indices, values = char_dict.lookup(char_split.values), dense_shape = char_split.dense_shape) char_tensor_reshape = tf.sparse_reshape(char_tensor,[-1]) char_tensor,term_mask = tf.sparse_fill_empty_rows(char_tensor_reshape,0) #return char_tensor char_vecs = tf.nn.embedding_lookup_sparse(c2v_emb, char_tensor, None, combiner='sum') char_vecs = tf.where(~term_mask, char_vecs, tf.zeros_like(char_vecs)) #return char_vecs term_char_vecs = tf.reshape(char_vecs, shape = tf.stack([tf.shape(text)[0],tf.cast(tf.reduce_max(str_tensor.indices[:,1])+1,tf.int32),-1,tf.shape(char_vecs)[-1]])) term_char_mask_tmp = tf.reduce_sum(term_char_vecs,axis=-1) term_char_mask = ~tf.equal(term_char_mask_tmp,0) term_char_len = tf.cast(tf.count_nonzero(term_char_mask,axis=-1),tf.int32) text_mask = ~tf.equal(tf.reduce_sum(term_char_mask_tmp,axis=-1),0) text_len = tf.cast(tf.count_nonzero(text_mask,axis=-1),tf.int32) return term_char_vecs, term_char_mask, term_char_len, text_mask, text_len
def xletter_feature_extractor(text,model_prefix,input_mode, op_dict=None,xletter_cnt=None,win_size=None,dim_xletter_emb=None): with tf.variable_scope("xletter_layer", reuse=tf.AUTO_REUSE): if input_mode=='mstf': xletter_emb = tf.get_variable(name='xletter_emb_' + model_prefix, shape = [xletter_cnt * win_size, dim_xletter_emb]) indices, ids, values, offsets = mstf.dssm_xletter(input=text, win_size=win_size, dict_handle=op_dict) offsets_to_dense = tf.segment_sum(tf.ones_like(offsets), offsets) batch_id = tf.cumsum(offsets_to_dense[:-1]) index_tensor = tf.concat([tf.expand_dims(batch_id,axis=-1), tf.expand_dims(indices,axis=-1)],axis=-1) value_tensor = ids dense_shape = tf.concat([tf.shape(offsets),tf.expand_dims(tf.reduce_max(indices) + 1,axis=-1)],axis=0) text_tensor = tf.SparseTensor(indices=tf.cast(index_tensor,tf.int64), values = value_tensor, dense_shape=tf.cast(dense_shape,tf.int64)) #conv text_tensor = tf.sparse_reshape(text_tensor,[-1]) text_tensor,text_mask = tf.sparse_fill_empty_rows(text_tensor,0) text_vecs = tf.nn.embedding_lookup_sparse(xletter_emb,text_tensor,None,combiner='sum') text_vecs = tf.where(~text_mask, text_vecs, tf.zeros_like(text_vecs)) text_vecs = tf.reshape(text_vecs,[-1,tf.reduce_max(indices) + 1,dim_xletter_emb]) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) elif input_mode=='pyfunc': query_split = tf.string_split(text,';') term_split = tf.string_split(query_split.values,',') xletter_tensor_indices = tf.transpose(tf.stack([tf.gather(query_split.indices[:,0],term_split.indices[:,0]),tf.gather(query_split.indices[:,1],term_split.indices[:,0])])) xletter_tensor = tf.SparseTensor(indices = xletter_tensor_indices, values = tf.string_to_number(term_split.values,out_type=tf.int32), dense_shape = query_split.dense_shape) xletter_emb = tf.get_variable(name='xletter_emb_' + model_prefix, shape = [xletter_cnt * win_size, dim_xletter_emb]) xletter_tensor_reshape = tf.sparse_reshape(xletter_tensor,[-1]) xletter_tensor,text_mask = tf.sparse_fill_empty_rows(xletter_tensor_reshape,0) xletter_vecs = tf.nn.embedding_lookup_sparse(xletter_emb, xletter_tensor, None, combiner='sum') xletter_vecs = tf.where(~text_mask, xletter_vecs, tf.zeros_like(xletter_vecs)) text_vecs = tf.reshape(xletter_vecs, shape=tf.stack([-1,tf.reduce_max(query_split.indices[:,1])+1,dim_xletter_emb])) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) elif input_mode=='pyfunc_batch': indices, values, dense_shape = tf.py_func(op_dict.batch_xletter_extractor,[text],[tf.int64,tf.int32,tf.int64]) xletter_tensor = tf.SparseTensor(indices = indices, values = values, dense_shape = dense_shape) xletter_emb = tf.get_variable(name='xletter_emb_' + model_prefix, shape = [xletter_cnt * win_size, dim_xletter_emb]) xletter_tensor_reshape = tf.sparse_reshape(xletter_tensor,[-1]) xletter_tensor,text_mask = tf.sparse_fill_empty_rows(xletter_tensor_reshape,0) xletter_vecs = tf.nn.embedding_lookup_sparse(xletter_emb, xletter_tensor, None, combiner='sum') xletter_vecs = tf.where(~text_mask, xletter_vecs, tf.zeros_like(xletter_vecs)) text_vecs = tf.reshape(xletter_vecs, shape=tf.stack([-1,dense_shape[1],dim_xletter_emb])) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) else: NotImplementedError return text_vecs, step_mask, sequence_length
def lookup_emb(text_tensor, text_padding, embedding_weight, dim_output): #conv text_tensor = tf.sparse_reshape(text_tensor,[-1]) text_tensor,text_mask = tf.sparse_fill_empty_rows(text_tensor,0) text_vecs = tf.nn.embedding_lookup_sparse(embedding_weight,text_tensor,None,combiner='sum') text_vecs = tf.where(~text_mask, text_vecs, tf.zeros_like(text_vecs)) text_vecs = tf.reshape(text_vecs,shape=tf.stack([-1,text_padding,dim_output])) step_mask = ~tf.equal(tf.reduce_sum(text_vecs,axis=2),0) sequence_length = tf.cast(tf.count_nonzero(step_mask,axis=1),tf.int32) return text_vecs, step_mask, sequence_length
def sparse_transform(ids, values, weight_shape): assert (len(weight_shape) == 2) with tf.device('/cpu:0'): weights = [] # change the number of shards of weight. num_shards = 1 assert (weight_shape[0] % num_shards == 0) for i in range(0, num_shards): weight_i = tf.get_variable( "weight_%02d" % i, [weight_shape[0] / num_shards] + weight_shape[1:], trainable=True, initializer=tf.truncated_normal_initializer(stddev=0.1)) weights.append(weight_i) ids, _ = tf.sparse_fill_empty_rows(ids, 0) values, _ = tf.sparse_fill_empty_rows(values, 0.0) return tf.nn.embedding_lookup_sparse(weights, ids, values, partition_strategy='div', combiner='sum')
def test_sparse(): """ 测试SparseTensor。 :return: """ # 位置索引 idx = [[0, 0, 0], [0, 1, 0], [1, 0, 3], [1, 1, 2], [1, 1, 3], [1, 2, 1]] # 张量值 val = [0, 10, 103, 112, 113, 114] # 张量形状 shape = [2, 3, 4] # 创建稀疏张量 sp = tf.SparseTensor(idx, val, shape) # 将SparseTensor转换为稠密的布尔指示器张量 si = tf.sparse_to_indicator(sp, 200) si_val = si[1, 1, 113] test_run_sess("sparse indicator", si) test_run_sess("sparse indicator value", si_val) # 稀疏张量叠加 sp1 = tf.SparseTensor([[0, 2], [1, 0], [1, 1]], ['a', 'b', 'c'], [2, 3]) sp2 = tf.SparseTensor([[0, 1], [0, 2]], ['d', 'e'], [2, 4]) sp3 = tf.SparseTensor([[0, 1], [0, 2]], ['d', 'e'], [2, 3]) con1 = tf.sparse_concat(1, [sp1, sp2], name=None) con2 = tf.sparse_concat(0, [sp1, sp3], name=None) test_run_sess("sparse concat1", con1) test_run_sess("sparse concat2", con2) # 稀疏张量重排序,成为以行为主的标准排序 sp4 = tf.SparseTensor([[0, 3], [0, 1], [3, 1], [2, 0]], ['b', 'a', 'd', 'c'], [4, 5]) rsp4 = tf.sparse_reorder(sp4) # 保留部分元素 to_retain = [True, False, False, True] rsp5 = tf.sparse_retain(sp4, to_retain) # 填充空行 rsp6 = tf.sparse_fill_empty_rows(sp4, 'zz') test_run_sess("rsp4", rsp4) test_run_sess("rsp5", rsp5) test_run_sess("rsp6", rsp6)
def module_fn_with_preprocessing(): """Spec function for a full-text embedding module with preprocessing.""" sentences = tf.placeholder(shape=[None], dtype=tf.string, name="sentences") # Perform a minimalistic text preprocessing by removing punctuation and # splitting on spaces. normalized_sentences = tf.regex_replace(input=sentences, pattern=r"\pP", rewrite="") tokens = tf.string_split(normalized_sentences, " ") embeddings_var = tf.get_variable(initializer=tf.zeros( [vocab_size + num_oov_buckets, embeddings_dim]), name=EMBEDDINGS_VAR_NAME, dtype=tf.float32) table_initializer = tf.lookup.TextFileInitializer( vocabulary_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, tf.lookup.TextFileIndex.LINE_NUMBER) lookup_table = tf.lookup.StaticVocabularyTable( table_initializer, num_oov_buckets=num_oov_buckets) sparse_ids = tf.SparseTensor(indices=tokens.indices, values=lookup_table.lookup(tokens.values), dense_shape=tokens.dense_shape) # In case some of the input sentences are empty before or after # normalization, we will end up with empty rows. We do however want to # return embedding for every row, so we have to fill in the empty rows with # a default. sparse_ids, _ = tf.sparse_fill_empty_rows( sparse_ids, lookup_table.lookup(tf.constant(""))) # In case all of the input sentences are empty before or after # normalization, we will end up with a SparseTensor with shape [?, 0]. After # filling in the empty rows we must ensure the shape is set properly to # [?, 1]. At this point, there are no empty rows, so the new shape will be # [sparse_ids.dense_shape[0], max(1, sparse_ids.dense_shape[1])]. sparse_ids = tf.sparse_reset_shape(sparse_ids) combined_embedding = tf.nn.embedding_lookup_sparse( params=embeddings_var, sp_ids=sparse_ids, sp_weights=None, combiner="sqrtn") hub.add_signature("default", {"sentences": sentences}, {"default": combined_embedding})
def TextExtract(text, win_size, dict_handle, weight, dim_input, dim_output, max_term_count=12): indices, ids, values, offsets = mstf.dssm_xletter( input=text, win_size=win_size, dict_handle=dict_handle, max_term_count=max_term_count) offsets_to_dense = tf.segment_sum(tf.ones_like(offsets), offsets) batch_id = tf.cumsum(offsets_to_dense[:-1]) #dense offset lei jia index_tensor = tf.concat( [tf.expand_dims(batch_id, axis=-1), tf.expand_dims(indices, axis=-1)], axis=-1) value_tensor = ids dense_shape = tf.concat([ tf.shape(offsets), tf.expand_dims(tf.reduce_max(indices) + 1, axis=-1) ], axis=0) text_tensor = tf.SparseTensor(indices=tf.cast(index_tensor, tf.int64), values=value_tensor, dense_shape=tf.cast(dense_shape, tf.int64)) text_padding = tf.reduce_max(indices) + 1 text_tensor = tf.sparse_reshape(text_tensor, [-1]) text_tensor, text_mask = tf.sparse_fill_empty_rows(text_tensor, dim_input - 1) text_vecs = tf.nn.embedding_lookup_sparse(weight, text_tensor, None, combiner='sum') text_vecs = tf.transpose( tf.multiply(tf.transpose(text_vecs), 1 - tf.cast(text_mask, dtype=tf.float32))) text_vecs = tf.reshape(text_vecs, [-1, text_padding, dim_output]) step_mask = tf.equal(tf.reduce_sum(text_vecs, axis=2), 0) step_mask = tf.where(step_mask, -math.inf * tf.ones_like(step_mask, dtype=tf.float32), tf.zeros_like(step_mask, dtype=tf.float32)) return text_vecs, text_padding, step_mask
def module_fn_with_preprocessing(): #支持全文本输入,带有预处理的模型 sentences = tf.placeholder(shape=[None], dtype=tf.string, name="sentences") #使用正则表达式,删除特殊符号 normalized_sentences = tf.regex_replace(input=sentences, pattern=r"\pP", rewrite="") #按照空格分词,得到稀疏矩阵 tokens = tf.string_split(normalized_sentences, " ") embeddings_var = tf.get_variable( #定义词嵌入变量 initializer=tf.zeros( [vocab_size + num_oov_buckets, embeddings_dim]), name='embedding', dtype=tf.float32) #用字典将词变为词向量 lookup_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=num_oov_buckets) #将稀疏矩阵用词嵌入转化 sparse_ids = tf.SparseTensor(indices=tokens.indices, values=lookup_table.lookup(tokens.values), dense_shape=tokens.dense_shape) #为稀疏矩阵添加空行 sparse_ids, _ = tf.sparse_fill_empty_rows( sparse_ids, lookup_table.lookup(tf.constant(""))) #sparse_ids = tf.sparse_reset_shape(sparse_ids) #结果进行平方和再开根号的规约计算 combined_embedding = tf.nn.embedding_lookup_sparse( params=embeddings_var, sp_ids=sparse_ids, sp_weights=None, combiner="sqrtn") #默认都统一使用default签名。如果额外指定,还需要在调用时与其对应 #输入和输出需要字典形式。可以是多个 hub.add_signature("default", {"sentences": sentences}, {"default": combined_embedding})
def module_fn_with_preprocessing(): """Spec function for a full-text embedding module with preprocessing.""" sentences = tf.placeholder(shape=[None], dtype=tf.string, name="sentences") # Perform a minimalistic text preprocessing by removing punctuation and # splitting on spaces. normalized_sentences = tf.regex_replace( input=sentences, pattern=r"\pP", rewrite="") tokens = tf.string_split(normalized_sentences, " ") # In case some of the input sentences are empty before or after # normalization, we will end up with empty rows. We do however want to # return embedding for every row, so we have to fill in the empty rows with # a default. tokens, _ = tf.sparse_fill_empty_rows(tokens, "") # In case all of the input sentences are empty before or after # normalization, we will end up with a SparseTensor with shape [?, 0]. After # filling in the empty rows we must ensure the shape is set properly to # [?, 1]. tokens = tf.sparse_reset_shape(tokens) embeddings_var = tf.get_variable( initializer=tf.zeros([vocab_size + num_oov_buckets, embeddings_dim]), name=EMBEDDINGS_VAR_NAME, dtype=tf.float32) lookup_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=vocabulary_file, num_oov_buckets=num_oov_buckets, ) sparse_ids = tf.SparseTensor( indices=tokens.indices, values=lookup_table.lookup(tokens.values), dense_shape=tokens.dense_shape) combined_embedding = tf.nn.embedding_lookup_sparse( params=embeddings_var, sp_ids=sparse_ids, sp_weights=None, combiner="sqrtn") hub.add_signature("default", {"sentences": sentences}, {"default": combined_embedding})
def _crnn_model_fn(features, labels, mode, params=None, config=None): if isinstance(features, dict): features = features['images'] max_width = params['max_width'] global_step = tf.train.get_or_create_global_step() logging.info("Features {}".format(features.shape)) features = tf.reshape(features, [params['batch_size'], 32, max_width, 3]) images = tf.transpose(features, [0, 2, 1, 3]) logging.info("Images {}".format(images.shape)) if (mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL): labels = tf.reshape(labels, [params['batch_size'], -1]) tf.summary.image('image', features) idx = tf.where(tf.not_equal(labels, 0)) sparse_labels = tf.SparseTensor( idx, tf.gather_nd(labels, idx), [params['batch_size'], params['max_target_seq_length']]) sparse_labels, _ = tf.sparse_fill_empty_rows(sparse_labels, params['num_labels'] - 1) # 64 / 3 x 3 / 1 / 1 conv1 = tf.layers.conv2d(inputs=images, filters=64, kernel_size=(3, 3), padding="same", activation=tf.nn.relu) logging.info("conv1 {}".format(conv1.shape)) # 2 x 2 / 1 pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) logging.info("pool1 {}".format(pool1.shape)) # 128 / 3 x 3 / 1 / 1 conv2 = tf.layers.conv2d(inputs=pool1, filters=128, kernel_size=(3, 3), padding="same", activation=tf.nn.relu) logging.info("conv2 {}".format(conv2.shape)) # 2 x 2 / 1 pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) logging.info("pool2 {}".format(pool2.shape)) # 256 / 3 x 3 / 1 / 1 conv3 = tf.layers.conv2d(inputs=pool2, filters=256, kernel_size=(3, 3), padding="same", activation=tf.nn.relu) logging.info("conv3 {}".format(conv3.shape)) # Batch normalization layer bnorm1 = tf.layers.batch_normalization(conv3) # 256 / 3 x 3 / 1 / 1 conv4 = tf.layers.conv2d(inputs=bnorm1, filters=256, kernel_size=(3, 3), padding="same", activation=tf.nn.relu) logging.info("conv4 {}".format(conv4.shape)) # 1 x 2 / 1 pool3 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=[1, 2], padding="same") logging.info("pool3 {}".format(pool3.shape)) # 512 / 3 x 3 / 1 / 1 conv5 = tf.layers.conv2d(inputs=pool3, filters=512, kernel_size=(3, 3), padding="same", activation=tf.nn.relu) logging.info("conv5 {}".format(conv5.shape)) # Batch normalization layer bnorm2 = tf.layers.batch_normalization(conv5) # 512 / 3 x 3 / 1 / 1 conv6 = tf.layers.conv2d(inputs=bnorm2, filters=512, kernel_size=(3, 3), padding="same", activation=tf.nn.relu) logging.info("conv6 {}".format(conv6.shape)) # 1 x 2 / 2 pool4 = tf.layers.max_pooling2d(inputs=conv6, pool_size=[2, 2], strides=[1, 2], padding="same") logging.info("pool4 {}".format(pool4.shape)) # 512 / 2 x 2 / 1 / 0 conv7 = tf.layers.conv2d(inputs=pool4, filters=512, kernel_size=(2, 2), padding="valid", activation=tf.nn.relu) logging.info("conv7 {}".format(conv7.shape)) reshaped_cnn_output = tf.reshape(conv7, [params['batch_size'], -1, 512]) rnn_inputs = tf.transpose(reshaped_cnn_output, perm=[1, 0, 2]) max_char_count = rnn_inputs.get_shape().as_list()[0] logging.info("max_char_count {}".format(max_char_count)) input_lengths = tf.zeros([params['batch_size']], dtype=tf.int32) + max_char_count logging.info("InpuLengh {}".format(input_lengths.shape)) if params['rnn_type'] == 'CudnnLSTM': rnn_output, rnn_state, new_states = _cudnn_lstm( mode, params, rnn_inputs) elif params['rnn_type'] == 'CudnnCompatibleLSTM': rnn_output, rnn_state, new_states = _cudnn_lstm_compatible( params, rnn_inputs) else: rnn_output, rnn_state, new_states = _basic_lstm( mode, params, rnn_inputs) with tf.variable_scope('Output_layer'): logits = tf.layers.dense( rnn_output, params['num_labels'], kernel_initializer=tf.contrib.layers.xavier_initializer()) if params['beam_search_decoder']: decoded, _log_prob = tf.nn.ctc_beam_search_decoder( logits, input_lengths) else: decoded, _log_prob = tf.nn.ctc_greedy_decoder(logits, input_lengths) prediction = tf.to_int32(decoded[0]) metrics = {} if (mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL): levenshtein = tf.edit_distance(prediction, sparse_labels, normalize=True) errors_rate = tf.metrics.mean(levenshtein) mean_error_rate = tf.reduce_mean(levenshtein) metrics['Error_Rate'] = errors_rate if mode == tf.estimator.ModeKeys.TRAIN: tf.summary.scalar('Error_Rate', mean_error_rate) with tf.name_scope('CTC'): ctc_loss = tf.nn.ctc_loss(sparse_labels, logits, input_lengths, ignore_longer_outputs_than_inputs=True) mean_loss = tf.reduce_mean( tf.truediv(ctc_loss, tf.to_float(input_lengths))) loss = mean_loss else: loss = None training_hooks = [] if mode == tf.estimator.ModeKeys.TRAIN: opt = tf.train.AdamOptimizer(params['learning_rate']) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): if params['grad_clip'] is None: train_op = opt.minimize(loss, global_step=global_step) else: gradients, variables = zip(*opt.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, params['grad_clip']) train_op = opt.apply_gradients( [(gradients[i], v) for i, v in enumerate(variables)], global_step=global_step) elif mode == tf.estimator.ModeKeys.EVAL: train_op = None else: train_op = None if mode == tf.estimator.ModeKeys.PREDICT: predictions = tf.sparse_to_dense(tf.to_int32(prediction.indices), tf.to_int32(prediction.dense_shape), tf.to_int32(prediction.values), default_value=-1, name="output") export_outputs = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions) } else: predictions = None export_outputs = None return tf.estimator.EstimatorSpec(mode=mode, eval_metric_ops=metrics, predictions=predictions, loss=loss, training_hooks=training_hooks, export_outputs=export_outputs, train_op=train_op)
def create_training_rnn(self, input_keep_prob, output_keep_prob, grad_clip, learning_rate, lr_decay_factor, use_iterator=False): """ Create the training RNN Parameters ---------- :param input_keep_prob: probability of keeping input signal for a cell during training :param output_keep_prob: probability of keeping output signal from a cell during training :param grad_clip: max gradient size (prevent exploding gradients) :param learning_rate: learning rate parameter fed to optimizer :param lr_decay_factor: decay factor of the learning rate :param use_iterator: if True then plug an iterator.get_next() operation for the input of the model, if None placeholders are created instead """ if self.rnn_created: logging.fatal( "Trying to create the acoustic RNN but it is already.") # Store model parameters self.input_keep_prob = input_keep_prob self.output_keep_prob = output_keep_prob if use_iterator is True: mfcc_batch, input_lengths, label_batch = self.iterator_get_next_op # Pad if the batch is not complete padded_mfcc_batch = tf.pad( mfcc_batch, [[0, self.batch_size - tf.size(input_lengths)], [0, 0], [0, 0]]) # Transpose padded_mfcc_batch in order to get time serie as first dimension # [batch_size, time_serie, input_dim] ====> [time_serie, batch_size, input_dim] inputs = tf.transpose(padded_mfcc_batch, perm=[1, 0, 2]) # Pad input_seq_lengths if the batch is not complete input_seq_lengths = tf.pad( input_lengths, [[0, self.batch_size - tf.size(input_lengths)]]) # Label tensor must be provided as a sparse tensor. idx = tf.where(tf.not_equal(label_batch, 0)) sparse_labels = tf.SparseTensor(idx, tf.gather_nd( label_batch, idx), [self.batch_size, self.max_target_seq_length]) # Pad sparse_labels if the batch is not complete sparse_labels, _ = tf.sparse_fill_empty_rows( sparse_labels, self.num_labels - 1) else: # Set placeholders for input self.inputs_ph = tf.placeholder( tf.float32, shape=[self.max_input_seq_length, None, self.input_dim], name="inputs_ph") self.input_seq_lengths_ph = tf.placeholder( tf.int32, shape=[None], name="input_seq_lengths_ph") self.labels_ph = tf.placeholder( tf.int32, shape=[None, self.max_target_seq_length], name="labels_ph") inputs = self.inputs_ph input_seq_lengths = self.input_seq_lengths_ph label_batch = self.labels_ph # Label tensor must be provided as a sparse tensor. # First get indexes from non-zero positions idx = tf.where(tf.not_equal(label_batch, 0)) # Then build a sparse tensor from indexes sparse_labels = tf.SparseTensor(idx, tf.gather_nd( label_batch, idx), [self.batch_size, self.max_target_seq_length]) self.global_step, logits, prediction, self.rnn_keep_state_op, self.rnn_state_zero_op, self.input_keep_prob_ph,\ self.output_keep_prob_ph, self.rnn_tuple_state = self._build_base_rnn(inputs, input_seq_lengths, False) # Add the train part to the network self.learning_rate_var = self._add_training_on_rnn( logits, grad_clip, learning_rate, lr_decay_factor, sparse_labels, input_seq_lengths, prediction) # Add the saving and restore operation self.saver_op = self._add_saving_op()
def create_training_rnn(self, input_keep_prob, output_keep_prob, grad_clip, learning_rate, lr_decay_factor, use_iterator=False): """ Create the training RNN Parameters ---------- :param input_keep_prob: probability of keeping input signal for a cell during training :param output_keep_prob: probability of keeping output signal from a cell during training :param grad_clip: max gradient size (prevent exploding gradients) :param learning_rate: learning rate parameter fed to optimizer :param lr_decay_factor: decay factor of the learning rate :param use_iterator: if True then plug an iterator.get_next() operation for the input of the model, if None placeholders are created instead """ if self.rnn_created: logging.fatal("Trying to create the acoustic RNN but it is already.") # Store model parameters self.input_keep_prob = input_keep_prob self.output_keep_prob = output_keep_prob if use_iterator is True: mfcc_batch, input_lengths, label_batch = self.iterator_get_next_op # Pad if the batch is not complete padded_mfcc_batch = tf.pad(mfcc_batch, [[0, self.batch_size - tf.size(input_lengths)], [0, 0], [0, 0]]) # Transpose padded_mfcc_batch in order to get time serie as first dimension # [batch_size, time_serie, input_dim] ====> [time_serie, batch_size, input_dim] inputs = tf.transpose(padded_mfcc_batch, perm=[1, 0, 2]) # Pad input_seq_lengths if the batch is not complete input_seq_lengths = tf.pad(input_lengths, [[0, self.batch_size - tf.size(input_lengths)]]) # Label tensor must be provided as a sparse tensor. idx = tf.where(tf.not_equal(label_batch, 0)) sparse_labels = tf.SparseTensor(idx, tf.gather_nd(label_batch, idx), [self.batch_size, self.max_target_seq_length]) # Pad sparse_labels if the batch is not complete sparse_labels, _ = tf.sparse_fill_empty_rows(sparse_labels, self.num_labels - 1) else: # Set placeholders for input self.inputs_ph = tf.placeholder(tf.float32, shape=[self.max_input_seq_length, None, self.input_dim], name="inputs_ph") self.input_seq_lengths_ph = tf.placeholder(tf.int32, shape=[None], name="input_seq_lengths_ph") self.labels_ph = tf.placeholder(tf.int32, shape=[None, self.max_target_seq_length], name="labels_ph") inputs = self.inputs_ph input_seq_lengths = self.input_seq_lengths_ph label_batch = self.labels_ph # Label tensor must be provided as a sparse tensor. # First get indexes from non-zero positions idx = tf.where(tf.not_equal(label_batch, 0)) # Then build a sparse tensor from indexes sparse_labels = tf.SparseTensor(idx, tf.gather_nd(label_batch, idx), [self.batch_size, self.max_target_seq_length]) self.global_step, logits, prediction, self.rnn_keep_state_op, self.rnn_state_zero_op, self.input_keep_prob_ph,\ self.output_keep_prob_ph, self.rnn_tuple_state = self._build_base_rnn(inputs, input_seq_lengths, False) # Add the train part to the network self.learning_rate_var = self._add_training_on_rnn(logits, grad_clip, learning_rate, lr_decay_factor, sparse_labels, input_seq_lengths, prediction) # Add the saving and restore operation self.saver_op = self._add_saving_op()