Example #1
0
    def pooling_layer(self, x, pooling_type=None):
        '''
      Add a pooling layer across the whole utterance.
      Input: [B, T, D]
        --> Reduce along T

      Statistics pooling output: [B, D * 2]
      Average pooling output: [B, D]
    '''
        assert_rank3 = tf.debugging.assert_rank(x, 3)
        with tf.control_dependencies([assert_rank3]):
            x = tf.identity(x)

        pooling_type = pooling_type if pooling_type else self.netconf[
            'frame_pooling_type']
        if pooling_type == 'stats':
            with tf.name_scope('stats_pooling'):
                mean, var = tf.nn.moments(x, 1)
                x = tf.concat([mean, tf.sqrt(var + 1e-6)], 1)
        elif pooling_type == 'average':
            with tf.name_scope('average_pooling'):
                mean, _ = tf.nn.moments(x, 1)
                x = mean
        else:
            raise ValueError('Unsupported frame_pooling_type: %s' %
                             (pooling_type))

        assert_rank2 = tf.debugging.assert_rank(x, 2)
        with tf.control_dependencies([assert_rank2]):
            x = tf.identity(x)

        return x
Example #2
0
  def build_export_output(self, model):  # pylint: disable=no-self-use
    """
    Build the output of the model for export.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
    transitions = model.transitions
    intent_logits, slots_logits = model.logits

    intent_score = tf.nn.softmax(intent_logits, name="intent_score")
    intent_preds = tf.argmax(intent_logits, axis=-1, name="intent_preds")

    slots_preds, slots_score = crf_decode(slots_logits, transitions,
                                          model.input_x_len)

    slots_preds = tf.identity(slots_preds, name="slots_preds")
    slots_score = tf.identity(slots_score, name="slots_score")

    model.preds = intent_preds, slots_preds
    model.score = intent_score, slots_score
    model.output_dict = {
        "slots_score": slots_score,
        "slots_preds": slots_preds,
        "intent_score": intent_score,
        "intent_preds": intent_preds
    }
    logging.info("Model built.")
Example #3
0
def transfer_bert_model(bert_model_dir, output_bert_model):
  graph = tf.Graph()
  max_seq_len = 512
  num_labels = 2
  use_one_hot_embeddings = False
  with graph.as_default():
    with tf.Session() as sess:
      input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids')
      input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask')
      segment_ids = tf.placeholder(tf.int32, (None, None), 'segment_ids')

      bert_config = modeling.BertConfig.from_json_file(os.path.join(bert_model_dir, 'bert_config.json'))
      model = modeling.BertModel(
        config=bert_config,
        is_training=False,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings)
      all_encoder_layers = model.get_all_encoder_layers()
      input_x_bert_cls = model.get_pooled_output()
      for idx, layer in enumerate(all_encoder_layers):
        layer = tf.identity(layer, "encoder_layers_" + str(idx))
        print("layer:", layer)
      input_x_bert_cls = tf.identity(input_x_bert_cls, "input_x_bert_cls")
      print("input_x_bert_cls", input_x_bert_cls)
      saver = tf.train.Saver()

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      saver.restore(sess, bert_model_dir + "/bert_model.ckpt")
      saver.save(sess, output_bert_model)
Example #4
0
    def curvature_range(self):
        # set up the curvature window
        self._curv_win = tf.Variable(np.zeros([
            self._curv_win_width,
        ]),
                                     dtype=tf.float32,
                                     name="curv_win",
                                     trainable=False)
        # we can use log smoothing for curvature range to follow trend faster
        # self._curv_win = tf.scatter_update(
        #   self._curv_win, self._global_step % self._curv_win_width,
        #   tf.log(self._grad_norm_squared + EPS))
        self._curv_win = tf.scatter_update(
            self._curv_win, self._global_step % self._curv_win_width,
            self._grad_norm_squared + EPS)
        # note here the iterations start from iteration 0
        valid_window = tf.slice(
            self._curv_win, tf.constant([
                0,
            ]),
            tf.expand_dims(tf.minimum(tf.constant(self._curv_win_width),
                                      self._global_step + 1),
                           dim=0))

        if self._h_min_log_smooth:
            self._h_min_t = tf.log(tf.reduce_min(valid_window) + EPS)
        else:
            self._h_min_t = tf.reduce_min(valid_window)
        if self._h_max_log_smooth:
            self._h_max_t = tf.log(tf.reduce_max(valid_window) + EPS)
        else:
            self._h_max_t = tf.reduce_max(valid_window)

        curv_range_ops = []
        with tf.control_dependencies([self._h_min_t, self._h_max_t]):
            avg_op = self._moving_averager.apply(
                [self._h_min_t, self._h_max_t])
            with tf.control_dependencies([avg_op]):
                if self._h_min_log_smooth:
                    self._h_min = tf.exp(
                        tf.identity(
                            self._moving_averager.average(self._h_min_t)))
                else:
                    self._h_min = \
                      tf.identity(self._moving_averager.average(self._h_min_t))
                if self._h_max_log_smooth:
                    self._h_max = tf.exp(
                        tf.identity(
                            self._moving_averager.average(self._h_max_t)))
                else:
                    self._h_max = \
                      tf.identity(self._moving_averager.average(self._h_max_t))
            if self._sparsity_debias:
                self._h_min = self._h_min * self._sparsity_avg
                self._h_max = self._h_max * self._sparsity_avg
        curv_range_ops.append(avg_op)
        return curv_range_ops
Example #5
0
def _freq_feat_graph(feat_name, **kwargs):
  winlen = kwargs.get('winlen')
  winstep = kwargs.get('winstep')
  feature_size = kwargs.get('feature_size')
  sr = kwargs.get('sr')  #pylint: disable=invalid-name
  nfft = kwargs.get('nfft')
  del nfft

  assert feat_name in ('fbank', 'spec')

  params = speech_ops.speech_params(
      sr=sr,
      bins=feature_size,
      add_delta_deltas=False,
      audio_frame_length=winlen,
      audio_frame_step=winstep)

  graph = None
  if feat_name == 'fbank':
    # get session
    if feat_name not in _global_sess:
      graph = tf.Graph()
      #pylint: disable=not-context-manager
      with graph.as_default():
        # fbank
        filepath = tf.placeholder(dtype=tf.string, shape=[], name='wavpath')
        waveforms, sample_rate = speech_ops.read_wav(filepath, params)
        del sample_rate
        fbank = speech_ops.extract_feature(waveforms, params)
        # shape must be [T, D, C]
        feat = tf.identity(fbank, name=feat_name)
  elif feat_name == 'spec':
    # magnitude spec
    if feat_name not in _global_sess:
      graph = tf.Graph()
      #pylint: disable=not-context-manager
      with graph.as_default():
        filepath = tf.placeholder(dtype=tf.string, shape=[], name='wavpath')
        waveforms, sample_rate = speech_ops.read_wav(filepath, params)

        spec = py_x_ops.spectrum(
            waveforms[:, 0],
            tf.cast(sample_rate, tf.dtypes.float32),
            output_type=1)  #output_type: 1, power spec; 2 log power spec
        spec = tf.sqrt(spec)
        # shape must be [T, D, C]
        spec = tf.expand_dims(spec, -1)
        feat = tf.identity(spec, name=feat_name)
  else:
    raise ValueError(f"Not support freq feat: {feat_name}.")

  return graph, (_get_out_tensor_name('wavpath',
                                      0), _get_out_tensor_name(feat_name, 0))
Example #6
0
    def resnet(self, inputs):
        ''' resnet_block. '''
        layers_list = self.netconf['layers_list']
        logging.info("layers_list : {}".format(layers_list))
        filters_list = self.netconf['filters_list']
        logging.info("filters_list : {}".format(filters_list))
        strides_list = self.netconf['strides_list']
        logging.info("strides_list : {}".format(strides_list))
        block_mode = self.netconf['block_mode']
        logging.info("block_mode : {}".format(block_mode))

        with tf.variable_scope('resnet'):
            x = tf.identity(inputs)
            with tf.variable_scope('input_layer'):
                x = common_layers.conv2d(x,
                                         'input_conv', (3, 3),
                                         self.input_channels,
                                         filters_list[0], [1, 1],
                                         bias=False)
                x = tf.layers.batch_normalization(x,
                                                  axis=-1,
                                                  momentum=0.9,
                                                  training=self.train,
                                                  name='input_bn')
                x = self.prelu_layer(x, 'input_prelu')

            for index, layer_num in enumerate(layers_list):
                unit_name = 'resblock-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    x = self.resnet_block(x, block_mode, layer_num,
                                          filters_list[index],
                                          filters_list[index + 1],
                                          strides_list[index])

        return x
Example #7
0
    def tdnn_block(self, inputs):
        ''' TDNN layers. '''
        if 'tdnn_method' in self.netconf:
            tdnn_method = self.netconf['tdnn_method']
        else:
            # Runs faster, support discrete context, for now.
            tdnn_method = 'splice_layer'
        tdnn_contexts = self.netconf['tdnn_contexts']
        logging.info("tdnn_contexts : {}".format(tdnn_contexts))
        tdnn_dims = self.netconf['tdnn_dims']
        logging.info("tdnn_dims : {}".format(tdnn_dims))

        layer_num = len(tdnn_contexts)
        assert layer_num == len(tdnn_dims)

        channels = [self.input_channels] + tdnn_dims
        logging.info("tdnn_channels : {}".format(channels))

        input_h_t = tf.shape(inputs)[1]
        input_w = inputs.shape[2]
        input_c = inputs.shape[3]
        if tdnn_method == 'conv1d':
            # NHWC -> NW'C, W' = H * W
            inputs = tf.reshape(inputs, [-1, input_h_t * input_w, input_c])
            last_w = channels[0]
        else:
            inputs = tf.reshape(inputs, [-1, input_h_t, input_w * input_c])
            last_w = input_w * input_c

        downsample_input_len = self.input_len
        with tf.variable_scope('tdnn'):
            x = tf.identity(inputs)
            for index in range(layer_num):
                unit_name = 'unit-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    tdnn_name = 'tdnn-' + str(index + 1)
                    x = common_layers.tdnn(x,
                                           tdnn_name,
                                           last_w,
                                           tdnn_contexts[index],
                                           channels[index + 1],
                                           has_bias=True,
                                           method=tdnn_method)
                    last_w = channels[index + 1]
                    x = tf.nn.relu(x)
                    if self.netconf['use_bn']:
                        bn_name = 'bn' + str(index + 1)
                        x = tf.layers.batch_normalization(x,
                                                          axis=-1,
                                                          momentum=0.9,
                                                          training=self.train,
                                                          name=bn_name)
                    if self.netconf['use_dropout']:
                        x = tf.layers.dropout(x,
                                              self.netconf['dropout_rate'],
                                              training=self.train)
                    downsample_input_len = downsample_input_len

        return x, downsample_input_len
Example #8
0
 def build_export_output(self, model):  # pylint: disable=no-self-use
   """
   Build the output of the model.
   `score` and `input_y` are for loss calculation.
   `preds` and `y_ground_truth` are for metric calculation.
   """
   model.preds = tf.identity(model.logits, name="preds")
   model.output_dict = {"preds": model.preds}
Example #9
0
    def conv_block(self, inputs, depthwise=False):
        ''' 2D conv layers. '''
        filters = self.netconf['filters']
        logging.info("filters : {}".format(filters))
        filters_size = self.netconf['filter_size']
        logging.info("filters_size : {}".format(filters_size))
        filters_strides = self.netconf['filter_stride']
        logging.info("filters_strides : {}".format(filters_strides))
        pools_size = self.netconf['pool_size']
        logging.info("pools_size : {}".format(pools_size))

        layer_num = len(filters)
        assert layer_num == len(filters_size)
        assert layer_num == len(filters_strides)
        assert layer_num == len(pools_size)

        channels = [self.input_channels] + filters
        logging.info("channels : {}".format(channels))

        downsample_input_len = self.input_len
        with tf.variable_scope('cnn'):
            x = tf.identity(inputs)
            for index, filt in enumerate(filters):
                unit_name = 'unit-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    if depthwise:
                        x = tf.layers.separable_conv2d(
                            x,
                            filters=filt,
                            kernel_size=filters_size[index],
                            strides=filters_strides[index],
                            padding='same',
                            name=unit_name)
                    else:
                        cnn_name = 'cnn-' + str(index + 1)
                        x = common_layers.conv2d(x, cnn_name,
                                                 filters_size[index],
                                                 channels[index],
                                                 channels[index + 1],
                                                 filters_strides[index])
                    x = tf.nn.relu(x)
                    if self.netconf['use_bn']:
                        bn_name = 'bn' + str(index + 1)
                        x = tf.layers.batch_normalization(x,
                                                          axis=-1,
                                                          momentum=0.9,
                                                          training=self.train,
                                                          name=bn_name)
                    if self.netconf['use_dropout']:
                        x = tf.layers.dropout(x,
                                              self.netconf['dropout_rate'],
                                              training=self.train)
                    x = common_layers.max_pool(x, pools_size[index],
                                               pools_size[index])
                    downsample_input_len = downsample_input_len / pools_size[
                        index][0]

        return x, downsample_input_len
Example #10
0
  def call(self, inputs, training=None, mask=None):  # pylint: disable=too-many-locals
    input_x = tf.identity(inputs["input_x"], name='input_x')
    if self.use_dense_task:
      dense_input = inputs["input_dense"]
    if self.use_true_length:
      # [batch_size, max_doc_len, max_sen_len]
      input_hx = self.pad_to_hier_input_true_len(
          input_x,
          self.max_doc_len,
          self.max_sen_len,
          self.split_token,
          padding_token=self.padding_token)
    else:
      # [batch_size, max_doc_len, max_sen_len]
      input_hx = self.pad_to_hier_input(
          input_x,
          self.max_doc_len,
          self.max_sen_len,
          padding_token=self.padding_token)

    # [batch_size, max_doc_len]
    sen_lens = compute_sen_lens(input_hx, padding_token=self.padding_token)
    # [batch_size]
    doc_lens = compute_doc_lens(sen_lens)
    # [batch_size, max_doc_len, max_sen_len, 1]
    sen_mask = tf.expand_dims(
        tf.sequence_mask(sen_lens, self.max_sen_len, dtype=tf.float32), axis=-1)

    # [batch_size, max_doc_len, 1]
    doc_mask = tf.expand_dims(
        tf.sequence_mask(doc_lens, self.max_doc_len, dtype=tf.float32), axis=-1)

    # [batch_size, max_doc_len, max_sen_len, embed_len]
    out = self.embed(input_hx)
    if self.use_pretrained_model:
      input_px = self.get_pre_train_graph(input_x)
      input_px = tf.reshape(
          input_px,
          [-1, self.max_doc_len, self.max_sen_len, self.pretrained_model_dim])
      out = tf.concat([out, input_px], axis=-1)
    out = self.embed_d(out, training=training)
    all_sen_encoder = tf.keras.layers.TimeDistributed(self.sen_encoder)
    # [batch_size, max_doc_len, features]
    out = all_sen_encoder(out, training=training, mask=sen_mask)
    # [batch_size, features]
    out = self.doc_encoder(out, training=training, mask=doc_mask)

    if self.use_dense_input:
      dense_out = self.dense_input_linear(dense_input)
      if self.only_dense_input:
        out = dense_out
      else:
        out = tf.keras.layers.Concatenate()([out, dense_out])

    # [batch_size, class_num]
    scores = self.final_dense(out)

    return scores
Example #11
0
    def call(self, features, **kwargs):
        self.train = kwargs['training']
        feats = tf.identity(features['inputs'], name='feats')
        texts = features['texts']

        with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
            feats, texts = self.preprocess(feats, texts)
            logits = self.model(feats, texts)
        return logits
    def build_export_output(self, model):  # pylint: disable=no-self-use
        """
    Build the output of the model.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
        model.preds, score = crf_decode(model.logits, model.transitions,
                                        model.input_x_len)

        model.score = tf.identity(score, name="score")
        model.output_dict = {"score": model.score, "preds": model.preds}
Example #13
0
def load_wav(wavpath, sr=8000):
    '''
  audio:
    np.float32, shape [None], sample in [-1, 1], using librosa.load
    np.int16, shape [None], sample in [-32768, 32767], using scipy.io.wavfile
    np.float32, shape[None, audio_channel], sample int [-1, 1], using tf.DecodeWav

  return
    sr: sample rate
    audio: [-1, 1], same to tf.DecodeWav
  '''
    #from scipy.io import wavfile
    #sample_rate, audio = wavfile.read(wavpath)

    #samples, sample_rate = librosa.load(wavpath, sr=sr)

    feat_name = 'load_wav'
    graph = None
    # get session
    if feat_name not in _global_sess:
        graph = tf.Graph()
        with graph.as_default():
            params = speech_ops.speech_params(sr=sr, audio_desired_samples=-1)
            t_wavpath = tf.placeholder(dtype=tf.string, name="wavpath")
            t_audio, t_sample_rate = speech_ops.read_wav(t_wavpath, params)
            t_audio = tf.identity(t_audio, name="audio")
            t_sample_rate = tf.identity(t_sample_rate, name="sample_rate")

    sess = _get_session(feat_name, graph)
    audio, sample_rate = sess.run([
        _get_out_tensor_name('audio', 0),
        _get_out_tensor_name('sample_rate', 0)
    ],
                                  feed_dict={"wavpath:0": wavpath})
    audio = audio[:, 0]

    assert sample_rate == sr, 'sampling rate must be {}Hz, get {}Hz'.format(
        sr, sample_rate)
    return sample_rate, audio
 def call(self, inputs, training=None, mask=None):
     input_x = inputs["input_x"]
     # [batch_size, max_len]
     input_x_lens = compute_sen_lens(input_x,
                                     padding_token=self.padding_token)
     # [batch_size, max_len, 1]
     mask = tf.expand_dims(tf.sequence_mask(input_x_lens,
                                            self.max_len,
                                            dtype=tf.float32),
                           axis=-1)
     # [batch_size, max_len, embed_len]
     out = self.embed(input_x)
     # [batch_size, features]
     out = self.embed_dropout(out, training=training)
     out = self.bi_rnn(out)
     intent_out = self.attention(out, mask=mask)
     intent_out = self.dropout(intent_out)
     intent_out = self.intent_dense(intent_out)
     intent_out = tf.identity(intent_out, name="intent_logits")
     slots_out = self.dropout(out)
     slots_out = self.slots_dense(slots_out)
     slots_out = tf.identity(slots_out, name="slots_logits")
     return intent_out, slots_out
Example #15
0
    def update_hyper_param(self):
        assign_hyper_ops = []
        self._mu = tf.identity(
            tf.cond(self._do_tune, lambda: self.get_mu_tensor(),
                    lambda: self._mu_var))
        with tf.control_dependencies([self._mu]):
            self._lr = tf.identity(
                tf.cond(self._do_tune, lambda: self.get_lr_tensor(),
                        lambda: self._lr_var))

        with tf.control_dependencies([self._mu, self._lr]):
            if self._use_unsmoothed_lr_mu:
                assign_hyper_ops.append(tf.assign(self._mu_var, self._mu))
                assign_hyper_ops.append(tf.assign(self._lr_var, self._lr))
            else:
                self._mu = self._beta * self._mu_var + (1 -
                                                        self._beta) * self._mu
                self._lr = self._beta * self._lr_var + (1 -
                                                        self._beta) * self._lr
                with tf.control_dependencies([self._mu, self._lr]):
                    assign_hyper_ops.append(tf.assign(self._mu_var, self._mu))
                    assign_hyper_ops.append(tf.assign(self._lr_var, self._lr))
        assign_hyper_op = tf.group(*assign_hyper_ops)
        return assign_hyper_op
Example #16
0
    def call(self, features, **kwargs):
        ''' Implementation of __call__(). '''
        self.train = kwargs['training']
        feats = tf.identity(features['inputs'], name='feats')
        logging.info(features)
        if 'labels' in features:
            labels = features['labels']
        else:
            # serving export mode
            labels = None

        with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
            feats = self.preprocess(feats)
            logits = self.model(feats, labels)
        return logits
    def build_output(self, model):  # pylint: disable=no-self-use
        """
    Build the output of the model.
    `score` and `input_y` are for loss calculation.
    `preds` and `y_ground_truth` are for metric calculation.
    """
        model.preds, score = crf_decode(model.logits, model.transitions,
                                        model.input_x_len)

        model.score = tf.identity(score, name="score")
        model.y_ground_truth = model.input_y
        if model.use_pretrained_model:
            logging.info("initialize_pretrained_model_variables")
            self.initialize_pretrained_model_variables(
                model.pretrained_model_path, model.pretrained_model_mode)
Example #18
0
def add_delta_delta(feat, feat_size, order=2):
  ''' add delta detla '''
  feat_name = 'delta_delta'
  graph = None
  # get session
  if feat_name not in _global_sess:
    graph = tf.Graph()
    #pylint: disable=not-context-manager
    with graph.as_default():
      fbank = tf.placeholder(
          dtype=tf.float32, shape=[None, feat_size, 1], name='fbank')
      feat_with_delta_delta = speech_ops.delta_delta(fbank, order=order)
      feat_with_delta_delta = tf.identity(feat_with_delta_delta, name=feat_name)

  sess = _get_session(feat_name, graph)
  feat = sess.run(
      _get_out_tensor_name(feat_name, 0), feed_dict={'fbank:0': feat})
  return feat
Example #19
0
 def call(self, inputs, training=None, mask=None):
   input_x = tf.identity(inputs["input_x"], name="input_x")
   if self.use_dense_task:
     dense_input = inputs["input_dense"]
   embed = self.embed(input_x)
   embed_expand = tf.expand_dims(embed, axis=-1)
   conv_outs = [conv2d(embed_expand) for conv2d in self.conv2ds]
   pool_outs = [pool(co) for co, pool in zip(conv_outs, self.pools)]
   out = tf.keras.layers.Concatenate(axis=1)(pool_outs)
   out = self.flat(out)
   out = self.dropout(out, training=training)
   out = self.dense(out)
   if self.use_dense_input:
     dense_out = self.dense_input_linear(dense_input)
     if self.only_dense_input:
       out = dense_out
     else:
       out = tf.keras.layers.Concatenate()([out, dense_out])
   scores = self.final_dense(out)
   return scores
Example #20
0
 def dist_to_opt(self):
     dist_to_opt_ops = []
     # running average of the norm of gradeint
     self._grad_norm = tf.sqrt(self._grad_norm_squared)
     avg_op = self._moving_averager.apply([
         self._grad_norm,
     ])
     dist_to_opt_ops.append(avg_op)
     with tf.control_dependencies([avg_op]):
         self._grad_norm_avg = self._moving_averager.average(
             self._grad_norm)
         # single iteration distance estimation
         # note that self._grad_norm_avg is per variable
         self._dist_to_opt = (self._grad_norm_avg /
                              (self._grad_norm_squared_avg + EPS))
     # running average of distance
     avg_op = self._moving_averager.apply([self._dist_to_opt])
     dist_to_opt_ops.append(avg_op)
     with tf.control_dependencies([avg_op]):
         self._dist_to_opt_avg = tf.identity(
             self._moving_averager.average(self._dist_to_opt))
         if self._sparsity_debias:
             self._dist_to_opt_avg /= (tf.sqrt(self._sparsity_avg) + EPS)
     return dist_to_opt_ops