def pooling_layer(self, x, pooling_type=None): ''' Add a pooling layer across the whole utterance. Input: [B, T, D] --> Reduce along T Statistics pooling output: [B, D * 2] Average pooling output: [B, D] ''' assert_rank3 = tf.debugging.assert_rank(x, 3) with tf.control_dependencies([assert_rank3]): x = tf.identity(x) pooling_type = pooling_type if pooling_type else self.netconf[ 'frame_pooling_type'] if pooling_type == 'stats': with tf.name_scope('stats_pooling'): mean, var = tf.nn.moments(x, 1) x = tf.concat([mean, tf.sqrt(var + 1e-6)], 1) elif pooling_type == 'average': with tf.name_scope('average_pooling'): mean, _ = tf.nn.moments(x, 1) x = mean else: raise ValueError('Unsupported frame_pooling_type: %s' % (pooling_type)) assert_rank2 = tf.debugging.assert_rank(x, 2) with tf.control_dependencies([assert_rank2]): x = tf.identity(x) return x
def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model for export. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ transitions = model.transitions intent_logits, slots_logits = model.logits intent_score = tf.nn.softmax(intent_logits, name="intent_score") intent_preds = tf.argmax(intent_logits, axis=-1, name="intent_preds") slots_preds, slots_score = crf_decode(slots_logits, transitions, model.input_x_len) slots_preds = tf.identity(slots_preds, name="slots_preds") slots_score = tf.identity(slots_score, name="slots_score") model.preds = intent_preds, slots_preds model.score = intent_score, slots_score model.output_dict = { "slots_score": slots_score, "slots_preds": slots_preds, "intent_score": intent_score, "intent_preds": intent_preds } logging.info("Model built.")
def transfer_bert_model(bert_model_dir, output_bert_model): graph = tf.Graph() max_seq_len = 512 num_labels = 2 use_one_hot_embeddings = False with graph.as_default(): with tf.Session() as sess: input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask') segment_ids = tf.placeholder(tf.int32, (None, None), 'segment_ids') bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_model_dir, 'bert_config.json')) model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) all_encoder_layers = model.get_all_encoder_layers() input_x_bert_cls = model.get_pooled_output() for idx, layer in enumerate(all_encoder_layers): layer = tf.identity(layer, "encoder_layers_" + str(idx)) print("layer:", layer) input_x_bert_cls = tf.identity(input_x_bert_cls, "input_x_bert_cls") print("input_x_bert_cls", input_x_bert_cls) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, bert_model_dir + "/bert_model.ckpt") saver.save(sess, output_bert_model)
def curvature_range(self): # set up the curvature window self._curv_win = tf.Variable(np.zeros([ self._curv_win_width, ]), dtype=tf.float32, name="curv_win", trainable=False) # we can use log smoothing for curvature range to follow trend faster # self._curv_win = tf.scatter_update( # self._curv_win, self._global_step % self._curv_win_width, # tf.log(self._grad_norm_squared + EPS)) self._curv_win = tf.scatter_update( self._curv_win, self._global_step % self._curv_win_width, self._grad_norm_squared + EPS) # note here the iterations start from iteration 0 valid_window = tf.slice( self._curv_win, tf.constant([ 0, ]), tf.expand_dims(tf.minimum(tf.constant(self._curv_win_width), self._global_step + 1), dim=0)) if self._h_min_log_smooth: self._h_min_t = tf.log(tf.reduce_min(valid_window) + EPS) else: self._h_min_t = tf.reduce_min(valid_window) if self._h_max_log_smooth: self._h_max_t = tf.log(tf.reduce_max(valid_window) + EPS) else: self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply( [self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): if self._h_min_log_smooth: self._h_min = tf.exp( tf.identity( self._moving_averager.average(self._h_min_t))) else: self._h_min = \ tf.identity(self._moving_averager.average(self._h_min_t)) if self._h_max_log_smooth: self._h_max = tf.exp( tf.identity( self._moving_averager.average(self._h_max_t))) else: self._h_max = \ tf.identity(self._moving_averager.average(self._h_max_t)) if self._sparsity_debias: self._h_min = self._h_min * self._sparsity_avg self._h_max = self._h_max * self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops
def _freq_feat_graph(feat_name, **kwargs): winlen = kwargs.get('winlen') winstep = kwargs.get('winstep') feature_size = kwargs.get('feature_size') sr = kwargs.get('sr') #pylint: disable=invalid-name nfft = kwargs.get('nfft') del nfft assert feat_name in ('fbank', 'spec') params = speech_ops.speech_params( sr=sr, bins=feature_size, add_delta_deltas=False, audio_frame_length=winlen, audio_frame_step=winstep) graph = None if feat_name == 'fbank': # get session if feat_name not in _global_sess: graph = tf.Graph() #pylint: disable=not-context-manager with graph.as_default(): # fbank filepath = tf.placeholder(dtype=tf.string, shape=[], name='wavpath') waveforms, sample_rate = speech_ops.read_wav(filepath, params) del sample_rate fbank = speech_ops.extract_feature(waveforms, params) # shape must be [T, D, C] feat = tf.identity(fbank, name=feat_name) elif feat_name == 'spec': # magnitude spec if feat_name not in _global_sess: graph = tf.Graph() #pylint: disable=not-context-manager with graph.as_default(): filepath = tf.placeholder(dtype=tf.string, shape=[], name='wavpath') waveforms, sample_rate = speech_ops.read_wav(filepath, params) spec = py_x_ops.spectrum( waveforms[:, 0], tf.cast(sample_rate, tf.dtypes.float32), output_type=1) #output_type: 1, power spec; 2 log power spec spec = tf.sqrt(spec) # shape must be [T, D, C] spec = tf.expand_dims(spec, -1) feat = tf.identity(spec, name=feat_name) else: raise ValueError(f"Not support freq feat: {feat_name}.") return graph, (_get_out_tensor_name('wavpath', 0), _get_out_tensor_name(feat_name, 0))
def resnet(self, inputs): ''' resnet_block. ''' layers_list = self.netconf['layers_list'] logging.info("layers_list : {}".format(layers_list)) filters_list = self.netconf['filters_list'] logging.info("filters_list : {}".format(filters_list)) strides_list = self.netconf['strides_list'] logging.info("strides_list : {}".format(strides_list)) block_mode = self.netconf['block_mode'] logging.info("block_mode : {}".format(block_mode)) with tf.variable_scope('resnet'): x = tf.identity(inputs) with tf.variable_scope('input_layer'): x = common_layers.conv2d(x, 'input_conv', (3, 3), self.input_channels, filters_list[0], [1, 1], bias=False) x = tf.layers.batch_normalization(x, axis=-1, momentum=0.9, training=self.train, name='input_bn') x = self.prelu_layer(x, 'input_prelu') for index, layer_num in enumerate(layers_list): unit_name = 'resblock-' + str(index + 1) with tf.variable_scope(unit_name): x = self.resnet_block(x, block_mode, layer_num, filters_list[index], filters_list[index + 1], strides_list[index]) return x
def tdnn_block(self, inputs): ''' TDNN layers. ''' if 'tdnn_method' in self.netconf: tdnn_method = self.netconf['tdnn_method'] else: # Runs faster, support discrete context, for now. tdnn_method = 'splice_layer' tdnn_contexts = self.netconf['tdnn_contexts'] logging.info("tdnn_contexts : {}".format(tdnn_contexts)) tdnn_dims = self.netconf['tdnn_dims'] logging.info("tdnn_dims : {}".format(tdnn_dims)) layer_num = len(tdnn_contexts) assert layer_num == len(tdnn_dims) channels = [self.input_channels] + tdnn_dims logging.info("tdnn_channels : {}".format(channels)) input_h_t = tf.shape(inputs)[1] input_w = inputs.shape[2] input_c = inputs.shape[3] if tdnn_method == 'conv1d': # NHWC -> NW'C, W' = H * W inputs = tf.reshape(inputs, [-1, input_h_t * input_w, input_c]) last_w = channels[0] else: inputs = tf.reshape(inputs, [-1, input_h_t, input_w * input_c]) last_w = input_w * input_c downsample_input_len = self.input_len with tf.variable_scope('tdnn'): x = tf.identity(inputs) for index in range(layer_num): unit_name = 'unit-' + str(index + 1) with tf.variable_scope(unit_name): tdnn_name = 'tdnn-' + str(index + 1) x = common_layers.tdnn(x, tdnn_name, last_w, tdnn_contexts[index], channels[index + 1], has_bias=True, method=tdnn_method) last_w = channels[index + 1] x = tf.nn.relu(x) if self.netconf['use_bn']: bn_name = 'bn' + str(index + 1) x = tf.layers.batch_normalization(x, axis=-1, momentum=0.9, training=self.train, name=bn_name) if self.netconf['use_dropout']: x = tf.layers.dropout(x, self.netconf['dropout_rate'], training=self.train) downsample_input_len = downsample_input_len return x, downsample_input_len
def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds = tf.identity(model.logits, name="preds") model.output_dict = {"preds": model.preds}
def conv_block(self, inputs, depthwise=False): ''' 2D conv layers. ''' filters = self.netconf['filters'] logging.info("filters : {}".format(filters)) filters_size = self.netconf['filter_size'] logging.info("filters_size : {}".format(filters_size)) filters_strides = self.netconf['filter_stride'] logging.info("filters_strides : {}".format(filters_strides)) pools_size = self.netconf['pool_size'] logging.info("pools_size : {}".format(pools_size)) layer_num = len(filters) assert layer_num == len(filters_size) assert layer_num == len(filters_strides) assert layer_num == len(pools_size) channels = [self.input_channels] + filters logging.info("channels : {}".format(channels)) downsample_input_len = self.input_len with tf.variable_scope('cnn'): x = tf.identity(inputs) for index, filt in enumerate(filters): unit_name = 'unit-' + str(index + 1) with tf.variable_scope(unit_name): if depthwise: x = tf.layers.separable_conv2d( x, filters=filt, kernel_size=filters_size[index], strides=filters_strides[index], padding='same', name=unit_name) else: cnn_name = 'cnn-' + str(index + 1) x = common_layers.conv2d(x, cnn_name, filters_size[index], channels[index], channels[index + 1], filters_strides[index]) x = tf.nn.relu(x) if self.netconf['use_bn']: bn_name = 'bn' + str(index + 1) x = tf.layers.batch_normalization(x, axis=-1, momentum=0.9, training=self.train, name=bn_name) if self.netconf['use_dropout']: x = tf.layers.dropout(x, self.netconf['dropout_rate'], training=self.train) x = common_layers.max_pool(x, pools_size[index], pools_size[index]) downsample_input_len = downsample_input_len / pools_size[ index][0] return x, downsample_input_len
def call(self, inputs, training=None, mask=None): # pylint: disable=too-many-locals input_x = tf.identity(inputs["input_x"], name='input_x') if self.use_dense_task: dense_input = inputs["input_dense"] if self.use_true_length: # [batch_size, max_doc_len, max_sen_len] input_hx = self.pad_to_hier_input_true_len( input_x, self.max_doc_len, self.max_sen_len, self.split_token, padding_token=self.padding_token) else: # [batch_size, max_doc_len, max_sen_len] input_hx = self.pad_to_hier_input( input_x, self.max_doc_len, self.max_sen_len, padding_token=self.padding_token) # [batch_size, max_doc_len] sen_lens = compute_sen_lens(input_hx, padding_token=self.padding_token) # [batch_size] doc_lens = compute_doc_lens(sen_lens) # [batch_size, max_doc_len, max_sen_len, 1] sen_mask = tf.expand_dims( tf.sequence_mask(sen_lens, self.max_sen_len, dtype=tf.float32), axis=-1) # [batch_size, max_doc_len, 1] doc_mask = tf.expand_dims( tf.sequence_mask(doc_lens, self.max_doc_len, dtype=tf.float32), axis=-1) # [batch_size, max_doc_len, max_sen_len, embed_len] out = self.embed(input_hx) if self.use_pretrained_model: input_px = self.get_pre_train_graph(input_x) input_px = tf.reshape( input_px, [-1, self.max_doc_len, self.max_sen_len, self.pretrained_model_dim]) out = tf.concat([out, input_px], axis=-1) out = self.embed_d(out, training=training) all_sen_encoder = tf.keras.layers.TimeDistributed(self.sen_encoder) # [batch_size, max_doc_len, features] out = all_sen_encoder(out, training=training, mask=sen_mask) # [batch_size, features] out = self.doc_encoder(out, training=training, mask=doc_mask) if self.use_dense_input: dense_out = self.dense_input_linear(dense_input) if self.only_dense_input: out = dense_out else: out = tf.keras.layers.Concatenate()([out, dense_out]) # [batch_size, class_num] scores = self.final_dense(out) return scores
def call(self, features, **kwargs): self.train = kwargs['training'] feats = tf.identity(features['inputs'], name='feats') texts = features['texts'] with tf.variable_scope('model', reuse=tf.AUTO_REUSE): feats, texts = self.preprocess(feats, texts) logits = self.model(feats, texts) return logits
def build_export_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds, score = crf_decode(model.logits, model.transitions, model.input_x_len) model.score = tf.identity(score, name="score") model.output_dict = {"score": model.score, "preds": model.preds}
def load_wav(wavpath, sr=8000): ''' audio: np.float32, shape [None], sample in [-1, 1], using librosa.load np.int16, shape [None], sample in [-32768, 32767], using scipy.io.wavfile np.float32, shape[None, audio_channel], sample int [-1, 1], using tf.DecodeWav return sr: sample rate audio: [-1, 1], same to tf.DecodeWav ''' #from scipy.io import wavfile #sample_rate, audio = wavfile.read(wavpath) #samples, sample_rate = librosa.load(wavpath, sr=sr) feat_name = 'load_wav' graph = None # get session if feat_name not in _global_sess: graph = tf.Graph() with graph.as_default(): params = speech_ops.speech_params(sr=sr, audio_desired_samples=-1) t_wavpath = tf.placeholder(dtype=tf.string, name="wavpath") t_audio, t_sample_rate = speech_ops.read_wav(t_wavpath, params) t_audio = tf.identity(t_audio, name="audio") t_sample_rate = tf.identity(t_sample_rate, name="sample_rate") sess = _get_session(feat_name, graph) audio, sample_rate = sess.run([ _get_out_tensor_name('audio', 0), _get_out_tensor_name('sample_rate', 0) ], feed_dict={"wavpath:0": wavpath}) audio = audio[:, 0] assert sample_rate == sr, 'sampling rate must be {}Hz, get {}Hz'.format( sr, sample_rate) return sample_rate, audio
def call(self, inputs, training=None, mask=None): input_x = inputs["input_x"] # [batch_size, max_len] input_x_lens = compute_sen_lens(input_x, padding_token=self.padding_token) # [batch_size, max_len, 1] mask = tf.expand_dims(tf.sequence_mask(input_x_lens, self.max_len, dtype=tf.float32), axis=-1) # [batch_size, max_len, embed_len] out = self.embed(input_x) # [batch_size, features] out = self.embed_dropout(out, training=training) out = self.bi_rnn(out) intent_out = self.attention(out, mask=mask) intent_out = self.dropout(intent_out) intent_out = self.intent_dense(intent_out) intent_out = tf.identity(intent_out, name="intent_logits") slots_out = self.dropout(out) slots_out = self.slots_dense(slots_out) slots_out = tf.identity(slots_out, name="slots_logits") return intent_out, slots_out
def update_hyper_param(self): assign_hyper_ops = [] self._mu = tf.identity( tf.cond(self._do_tune, lambda: self.get_mu_tensor(), lambda: self._mu_var)) with tf.control_dependencies([self._mu]): self._lr = tf.identity( tf.cond(self._do_tune, lambda: self.get_lr_tensor(), lambda: self._lr_var)) with tf.control_dependencies([self._mu, self._lr]): if self._use_unsmoothed_lr_mu: assign_hyper_ops.append(tf.assign(self._mu_var, self._mu)) assign_hyper_ops.append(tf.assign(self._lr_var, self._lr)) else: self._mu = self._beta * self._mu_var + (1 - self._beta) * self._mu self._lr = self._beta * self._lr_var + (1 - self._beta) * self._lr with tf.control_dependencies([self._mu, self._lr]): assign_hyper_ops.append(tf.assign(self._mu_var, self._mu)) assign_hyper_ops.append(tf.assign(self._lr_var, self._lr)) assign_hyper_op = tf.group(*assign_hyper_ops) return assign_hyper_op
def call(self, features, **kwargs): ''' Implementation of __call__(). ''' self.train = kwargs['training'] feats = tf.identity(features['inputs'], name='feats') logging.info(features) if 'labels' in features: labels = features['labels'] else: # serving export mode labels = None with tf.variable_scope('model', reuse=tf.AUTO_REUSE): feats = self.preprocess(feats) logits = self.model(feats, labels) return logits
def build_output(self, model): # pylint: disable=no-self-use """ Build the output of the model. `score` and `input_y` are for loss calculation. `preds` and `y_ground_truth` are for metric calculation. """ model.preds, score = crf_decode(model.logits, model.transitions, model.input_x_len) model.score = tf.identity(score, name="score") model.y_ground_truth = model.input_y if model.use_pretrained_model: logging.info("initialize_pretrained_model_variables") self.initialize_pretrained_model_variables( model.pretrained_model_path, model.pretrained_model_mode)
def add_delta_delta(feat, feat_size, order=2): ''' add delta detla ''' feat_name = 'delta_delta' graph = None # get session if feat_name not in _global_sess: graph = tf.Graph() #pylint: disable=not-context-manager with graph.as_default(): fbank = tf.placeholder( dtype=tf.float32, shape=[None, feat_size, 1], name='fbank') feat_with_delta_delta = speech_ops.delta_delta(fbank, order=order) feat_with_delta_delta = tf.identity(feat_with_delta_delta, name=feat_name) sess = _get_session(feat_name, graph) feat = sess.run( _get_out_tensor_name(feat_name, 0), feed_dict={'fbank:0': feat}) return feat
def call(self, inputs, training=None, mask=None): input_x = tf.identity(inputs["input_x"], name="input_x") if self.use_dense_task: dense_input = inputs["input_dense"] embed = self.embed(input_x) embed_expand = tf.expand_dims(embed, axis=-1) conv_outs = [conv2d(embed_expand) for conv2d in self.conv2ds] pool_outs = [pool(co) for co, pool in zip(conv_outs, self.pools)] out = tf.keras.layers.Concatenate(axis=1)(pool_outs) out = self.flat(out) out = self.dropout(out, training=training) out = self.dense(out) if self.use_dense_input: dense_out = self.dense_input_linear(dense_input) if self.only_dense_input: out = dense_out else: out = tf.keras.layers.Concatenate()([out, dense_out]) scores = self.final_dense(out) return scores
def dist_to_opt(self): dist_to_opt_ops = [] # running average of the norm of gradeint self._grad_norm = tf.sqrt(self._grad_norm_squared) avg_op = self._moving_averager.apply([ self._grad_norm, ]) dist_to_opt_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._grad_norm_avg = self._moving_averager.average( self._grad_norm) # single iteration distance estimation # note that self._grad_norm_avg is per variable self._dist_to_opt = (self._grad_norm_avg / (self._grad_norm_squared_avg + EPS)) # running average of distance avg_op = self._moving_averager.apply([self._dist_to_opt]) dist_to_opt_ops.append(avg_op) with tf.control_dependencies([avg_op]): self._dist_to_opt_avg = tf.identity( self._moving_averager.average(self._dist_to_opt)) if self._sparsity_debias: self._dist_to_opt_avg /= (tf.sqrt(self._sparsity_avg) + EPS) return dist_to_opt_ops