Exemple #1
0
    def resnet(self, inputs):
        ''' resnet_block. '''
        layers_list = self.netconf['layers_list']
        logging.info("layers_list : {}".format(layers_list))
        filters_list = self.netconf['filters_list']
        logging.info("filters_list : {}".format(filters_list))
        strides_list = self.netconf['strides_list']
        logging.info("strides_list : {}".format(strides_list))
        block_mode = self.netconf['block_mode']
        logging.info("block_mode : {}".format(block_mode))

        with tf.variable_scope('resnet'):
            x = tf.identity(inputs)
            with tf.variable_scope('input_layer'):
                x = common_layers.conv2d(x,
                                         'input_conv', (3, 3),
                                         self.input_channels,
                                         filters_list[0], [1, 1],
                                         bias=False)
                x = tf.layers.batch_normalization(x,
                                                  axis=-1,
                                                  momentum=0.9,
                                                  training=self.train,
                                                  name='input_bn')
                x = self.prelu_layer(x, 'input_prelu')

            for index, layer_num in enumerate(layers_list):
                unit_name = 'resblock-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    x = self.resnet_block(x, block_mode, layer_num,
                                          filters_list[index],
                                          filters_list[index + 1],
                                          strides_list[index])

        return x
Exemple #2
0
    def model(self, feats, labels):
        ''' Build the model. '''
        x = self.resnet(feats)

        with tf.variable_scope("avg_pooling"):
            batch_t = tf.shape(x)[0]
            time_t = tf.shape(x)[1]
            feat, channel = x.shape.as_list()[2:]
            x = tf.reshape(x, [batch_t, time_t, feat * channel])
            x = self.pooling_layer(x, pooling_type='average')

        with tf.variable_scope("output_layer"):
            shape = x.shape.as_list()
            shape = shape[-1]
            hidden_dims = self.params().embedding_size
            y = x
            y = common_layers.linear(y,
                                     'dense-matmul', [shape, hidden_dims],
                                     has_bias=True)
            y = tf.layers.batch_normalization(y,
                                              axis=-1,
                                              momentum=0.99,
                                              training=self.train,
                                              name='dense-bn')
            embedding = y
            dense_output = y

        logits = self.logits_layer(dense_output, labels)
        model_outputs = {'logits': logits, 'embeddings': embedding}
        return model_outputs
Exemple #3
0
    def tdnn_block(self, inputs):
        ''' TDNN layers. '''
        if 'tdnn_method' in self.netconf:
            tdnn_method = self.netconf['tdnn_method']
        else:
            # Runs faster, support discrete context, for now.
            tdnn_method = 'splice_layer'
        tdnn_contexts = self.netconf['tdnn_contexts']
        logging.info("tdnn_contexts : {}".format(tdnn_contexts))
        tdnn_dims = self.netconf['tdnn_dims']
        logging.info("tdnn_dims : {}".format(tdnn_dims))

        layer_num = len(tdnn_contexts)
        assert layer_num == len(tdnn_dims)

        channels = [self.input_channels] + tdnn_dims
        logging.info("tdnn_channels : {}".format(channels))

        input_h_t = tf.shape(inputs)[1]
        input_w = inputs.shape[2]
        input_c = inputs.shape[3]
        if tdnn_method == 'conv1d':
            # NHWC -> NW'C, W' = H * W
            inputs = tf.reshape(inputs, [-1, input_h_t * input_w, input_c])
            last_w = channels[0]
        else:
            inputs = tf.reshape(inputs, [-1, input_h_t, input_w * input_c])
            last_w = input_w * input_c

        downsample_input_len = self.input_len
        with tf.variable_scope('tdnn'):
            x = tf.identity(inputs)
            for index in range(layer_num):
                unit_name = 'unit-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    tdnn_name = 'tdnn-' + str(index + 1)
                    x = common_layers.tdnn(x,
                                           tdnn_name,
                                           last_w,
                                           tdnn_contexts[index],
                                           channels[index + 1],
                                           has_bias=True,
                                           method=tdnn_method)
                    last_w = channels[index + 1]
                    x = tf.nn.relu(x)
                    if self.netconf['use_bn']:
                        bn_name = 'bn' + str(index + 1)
                        x = tf.layers.batch_normalization(x,
                                                          axis=-1,
                                                          momentum=0.9,
                                                          training=self.train,
                                                          name=bn_name)
                    if self.netconf['use_dropout']:
                        x = tf.layers.dropout(x,
                                              self.netconf['dropout_rate'],
                                              training=self.train)
                    downsample_input_len = downsample_input_len

        return x, downsample_input_len
Exemple #4
0
    def conv_block(self, inputs, depthwise=False):
        ''' 2D conv layers. '''
        filters = self.netconf['filters']
        logging.info("filters : {}".format(filters))
        filters_size = self.netconf['filter_size']
        logging.info("filters_size : {}".format(filters_size))
        filters_strides = self.netconf['filter_stride']
        logging.info("filters_strides : {}".format(filters_strides))
        pools_size = self.netconf['pool_size']
        logging.info("pools_size : {}".format(pools_size))

        layer_num = len(filters)
        assert layer_num == len(filters_size)
        assert layer_num == len(filters_strides)
        assert layer_num == len(pools_size)

        channels = [self.input_channels] + filters
        logging.info("channels : {}".format(channels))

        downsample_input_len = self.input_len
        with tf.variable_scope('cnn'):
            x = tf.identity(inputs)
            for index, filt in enumerate(filters):
                unit_name = 'unit-' + str(index + 1)
                with tf.variable_scope(unit_name):
                    if depthwise:
                        x = tf.layers.separable_conv2d(
                            x,
                            filters=filt,
                            kernel_size=filters_size[index],
                            strides=filters_strides[index],
                            padding='same',
                            name=unit_name)
                    else:
                        cnn_name = 'cnn-' + str(index + 1)
                        x = common_layers.conv2d(x, cnn_name,
                                                 filters_size[index],
                                                 channels[index],
                                                 channels[index + 1],
                                                 filters_strides[index])
                    x = tf.nn.relu(x)
                    if self.netconf['use_bn']:
                        bn_name = 'bn' + str(index + 1)
                        x = tf.layers.batch_normalization(x,
                                                          axis=-1,
                                                          momentum=0.9,
                                                          training=self.train,
                                                          name=bn_name)
                    if self.netconf['use_dropout']:
                        x = tf.layers.dropout(x,
                                              self.netconf['dropout_rate'],
                                              training=self.train)
                    x = common_layers.max_pool(x, pools_size[index],
                                               pools_size[index])
                    downsample_input_len = downsample_input_len / pools_size[
                        index][0]

        return x, downsample_input_len
Exemple #5
0
    def apply_gradients(self, grads_tvars, global_step=None, name=None):
        self._grads, self._tvars = zip(*[(g, t) for g, t in grads_tvars
                                         if g is not None])

        # for manual gradient clipping
        if self._clip_thresh_var is not None:
            self._grads, self._grads_norm = tf.clip_by_global_norm(
                self._grads, self._clip_thresh_var)

        # loosely adaptive clipping of gradient in case exploding gradient ruins statistics
        if self._use_adapt_grad_clip:
            thresh = tf.cond(
                self._do_tune, lambda: tf.sqrt(self._stat_protect_fac * self.
                                               _adapt_grad_clip_thresh**2),
                lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL)))
            self._grads, self._grads_norm = tf.clip_by_global_norm(
                self._grads, thresh)

        with tf.variable_scope("before_apply"):
            before_apply_op = self.before_apply()

        with tf.variable_scope("update_hyper"):
            with tf.control_dependencies([before_apply_op]):
                update_hyper_op = self.update_hyper_param()

        with tf.variable_scope("apply_updates"):
            with tf.control_dependencies([update_hyper_op]):

                # clip exploding gradient according to h_max
                if self._use_adapt_grad_clip:
                    thresh = tf.cond(
                        tf.greater(tf.global_norm(self._grads),
                                   self._adapt_grad_clip_thresh),
                        lambda: self._adapt_grad_clip_target_val,
                        lambda: tf.to_float(tf.constant(LARGE_FLOAT_VAL)))
                    self._grads, self._grads_norm = tf.clip_by_global_norm(
                        self._grads, thresh)

                apply_grad_op = self._optimizer.apply_gradients(
                    zip(self._grads, self._tvars), global_step, name)

        with tf.control_dependencies([apply_grad_op]):
            self._increment_global_step_op = tf.assign(self._global_step,
                                                       self._global_step + 1)

            self._adapt_grad_clip_thresh_op = \
              tf.assign(self._adapt_grad_clip_thresh, tf.sqrt(self._h_max) )
            self._adapt_grad_clip_target_val_op = \
              tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(self._h_max) )
            # self._adapt_grad_clip_target_val_op = \
            #   tf.assign(self._adapt_grad_clip_target_val, tf.sqrt(tf.sqrt(self._h_max * self._h_min)))

        return tf.group(before_apply_op, update_hyper_op, apply_grad_op,
                        self._adapt_grad_clip_thresh_op,
                        self._adapt_grad_clip_target_val_op,
                        self._increment_global_step_op)
Exemple #6
0
    def lstm_layer(self, x):
        ''' LSTM layers. '''
        if self.netconf['use_lstm_layer']:
            with tf.variable_scope('lstm'):
                cell_fw = tf.contrib.rnn.BasicLSTMCell(
                    self.netconf['cell_num'], forget_bias=1.0)
                if self.netconf['use_dropout']:
                    cell_fw = tf.contrib.rnn.DropoutWrapper(
                        cell=cell_fw,
                        output_keep_prob=1 -
                        self.netconf['dropout_rate'] if self.train else 1.0)

                cell_bw = tf.contrib.rnn.BasicLSTMCell(
                    self.netconf['cell_num'], forget_bias=1.0)
                if self.netconf['use_dropout']:
                    cell_bw = tf.contrib.rnn.DropoutWrapper(
                        cell=cell_bw,
                        output_keep_prob=1 -
                        self.netconf['dropout_rate'] if self.train else 1.0)

                # Now we feed `linear` into the LSTM BRNN cell and obtain the LSTM BRNN output.
                outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,
                                                             cell_bw=cell_bw,
                                                             inputs=x,
                                                             dtype=tf.float32,
                                                             time_major=False,
                                                             scope='LSTM1')
        else:
            outputs = x
        return outputs
Exemple #7
0
 def preprocess(self, inputs):
     ''' Speech preprocessing. '''
     with tf.variable_scope('feature'):
         if self.input_type == 'samples':
             # FIXME: stub
             feats = None
         else:
             if 'cmvn_type' in self.audioconf:
                 cmvn_type = self.audioconf['cmvn_type']
             else:
                 cmvn_type = 'global'
             logging.info('cmvn_type: %s' % (cmvn_type))
             if cmvn_type == 'global':
                 self.mean, self.std = utils.load_cmvn(
                     self.audioconf['cmvn_path'])
                 feats = utils.apply_cmvn(inputs, self.mean, self.std)
             elif cmvn_type == 'local':
                 feats = utils.apply_local_cmvn(inputs)
             elif cmvn_type == 'sliding':
                 raise ValueError('cmvn_type %s not implemented yet.' %
                                  (cmvn_type))
             elif cmvn_type == 'none':
                 feats = inputs
             else:
                 raise ValueError('Error cmvn_type %s.' % (cmvn_type))
     return feats
Exemple #8
0
 def logits_layer(self, x):
     ''' output layers'''
     with tf.variable_scope('logits'):
         logits = common_layers.linear(
             x, 'logits-matmul',
             [self.netconf['hidden1'], self.taskconf['classes']['num']])
     return logits
Exemple #9
0
  def clip_gradients(self, grads_and_vars, clip_ratio):
    """Clip the gradients."""
    is_zip_obj = False
    if isinstance(grads_and_vars, zip):
      grads_and_vars = list(grads_and_vars)
      is_zip_obj = True

    with tf.variable_scope('grad'):
      for grad, var in grads_and_vars:
        if grad is not None:
          tf.summary.histogram(var.name[:-2], grad)
        else:
          logging.debug('%s gradient is None' % (var.name))

    # not clip
    if not clip_ratio:
      if is_zip_obj:
        grads, variables = zip(*grads_and_vars)
        grads_and_vars = zip(grads, variables)
      return grads_and_vars

    gradients, variables = zip(*grads_and_vars)
    clipped, global_norm = tf.clip_by_global_norm(gradients, clip_ratio)
    grad_and_var_clipped = zip(clipped, variables)

    tf.summary.scalar('gradient/global_norm', global_norm)
    return grad_and_var_clipped
Exemple #10
0
 def __call__(self, **kwargs):
     name = kwargs.get('name')
     kwargs.pop('name')
     with tf.variable_scope(name):
         loss = self.call(**kwargs)
     summary.scalar(name, loss)
     return loss
Exemple #11
0
def conv2d(x,
           name,
           filter_size,
           in_channels,
           out_channels,
           strides,
           bias=True):
    """2D convolution."""
    with tf.variable_scope(name):
        kernel = tf.get_variable(
            name='DW',
            shape=[filter_size[0], filter_size[1], in_channels, out_channels],
            dtype=tf.float32,
            initializer=tf.initializers.glorot_uniform())
        if bias:
            b = tf.get_variable(name='bias',
                                shape=[out_channels],
                                dtype=tf.float32,
                                initializer=tf.constant_initializer(0.0))
        out = tf.nn.conv2d(x,
                           kernel, [1, strides[0], strides[1], 1],
                           padding='SAME')
        if bias:
            out = tf.nn.bias_add(out, b)
        return out
Exemple #12
0
def extract_feature(waveforms, params):
    '''extract fbank with delta-delta and do cmvn
     waveforms: [batch, samples]
  '''
    p = params
    with tf.variable_scope('feature_extractor'):
        mel_fbanks = extract_logfbank_with_delta(waveforms, params)
        # shape: [1, nframes, nbins, nchannels]
        fbank_size = utils.shape_list(mel_fbanks)
        #assert fbank_size[0] == 1

        # This replaces CMVN estimation on data
        if not p.audio_global_cmvn:
            mean = tf.reduce_mean(mel_fbanks, keepdims=True, axis=1)
            variance = tf.reduce_mean(tf.square(mel_fbanks - mean),
                                      keepdims=True,
                                      axis=1)
        else:
            assert p.audio_cmvn_path, p.audio_cmvn_path
            mean, variance = utils.load_cmvn(p.audio_cmvn_path)

        var_epsilon = 1e-09
        mel_fbanks = utils.apply_cmvn(mel_fbanks, mean, variance, var_epsilon)

        # Later models like to flatten the two spatial dims. Instead, we add a
        # unit spatial dim and flatten the frequencies and channels.
        batch_size = fbank_size[0]
        feats = tf.concat([
            tf.reshape(
                mel_fbanks,
                [batch_size, fbank_size[1], fbank_size[2], fbank_size[3]]),
            tf.zeros((batch_size, p.num_zeropad_frames, fbank_size[2],
                      fbank_size[3]))
        ], 1)
    return feats  # shape [batch_size, nframes, featue_size, chnanels]
def embedding_look_up(text_inputs, vocab_size, embedding_size):
  """Embedding layer."""
  with tf.variable_scope("embedding"):
    W = tf.get_variable(
        name='W',
        initializer=tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0))
    embedding_chars = tf.nn.embedding_lookup(W, text_inputs)
    embedding_chars_expanded = tf.expand_dims(embedding_chars, -1)
  return embedding_chars_expanded
Exemple #14
0
    def call(self, features, **kwargs):
        self.train = kwargs['training']
        feats = tf.identity(features['inputs'], name='feats')
        texts = features['texts']

        with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
            feats, texts = self.preprocess(feats, texts)
            logits = self.model(feats, texts)
        return logits
Exemple #15
0
def tdnn(x,
         name,
         in_dim,
         context,
         out_dim,
         has_bias=True,
         method='splice_layer'):
    '''
  TDNN implementation.

  Args:
    context:
      a int of left and right context, or
      a list of context indexes, e.g. (-2, 0, 2).
    method:
      splice_layer: use column-first patch-based copy.
      splice_op: use row-first while_loop copy.
      conv1d: use conv1d as TDNN equivalence.
  '''
    if hasattr(context, '__iter__'):
        context_size = len(context)
        if method in ('splice_op', 'conv1d'):
            msg = 'Method splice_op and conv1d does not support context list.'
            raise ValueError(msg)
        context_list = context
    else:
        context_size = context * 2 + 1
        context_list = range(-context, context + 1)
    with tf.variable_scope(name):
        if method == 'splice_layer':
            x = splice_layer(x, 'splice', context_list)
            x = linear(x,
                       'linear', [in_dim * context_size, out_dim],
                       has_bias=has_bias)
        elif method == 'splice_op':
            x = speech_ops.splice(x, context, context)
            x = linear(x,
                       'linear', [in_dim * context_size, out_dim],
                       has_bias=has_bias)
        elif method == 'conv1d':
            kernel = tf.get_variable(
                name='DW',
                shape=[context, in_dim, out_dim],
                dtype=tf.float32,
                initializer=tf.glorot_uniform_initializer())
            x = tf.nn.conv1d(x, kernel, stride=1, padding='SAME')
            if has_bias:
                b = tf.get_variable(name='bias',
                                    shape=[out_dim],
                                    dtype=tf.float32,
                                    initializer=tf.constant_initializer(0.0))
                x = tf.nn.bias_add(x, b)
        else:
            raise ValueError('Unsupported method: %s.' % (method))
        return x
Exemple #16
0
 def text_layer(self, x, input_text):
     ''' text embbeding layers'''
     with tf.variable_scope('text'):
         embedding_chars_expanded = common_layers.embedding_look_up(
             input_text, self.vocab_size, self.netconf['embedding_dim'])
         h_pool_flat = common_layers.conv_pool(
             embedding_chars_expanded,
             list(map(int, self.netconf['filter_sizes'])),
             self.netconf['embedding_dim'], self.netconf['num_filters'],
             input_text.shape[1])
         outputs = tf.concat((x, h_pool_flat), axis=1)
     return outputs
Exemple #17
0
  def get_eval_hooks(self, labels, logits):
    ''' lables: [batch]
            logits: [batch, num_classes]
        '''
    eval_hooks = []
    metric_tensor = {}
    with tf.variable_scope('metrics'):
      true_label = labels
      softmax = tf.nn.softmax(logits)
      pred_label = tf.argmax(softmax, -1)
      eval_metrics_ops = {
          'accuracy':
              tf.metrics.accuracy(
                  labels=true_label, predictions=pred_label, weights=None),
          'auc':
              tf.metrics.auc(
                  labels=true_label,
                  predictions=softmax[:, -1],
                  num_thresholds=20,
                  curve='ROC',
                  summation_method='trapezoidal'),
          'precision':
              tf.metrics.precision(
                  labels=true_label, predictions=pred_label, weights=None),
          'recall':
              tf.metrics.recall(
                  labels=true_label, predictions=pred_label, weights=None),
          'tp':
              tf.metrics.true_positives(
                  labels=true_label, predictions=pred_label, weights=None),
          'fn':
              tf.metrics.false_negatives(
                  labels=true_label, predictions=pred_label, weights=None),
          'fp':
              tf.metrics.false_positives(
                  labels=true_label, predictions=pred_label, weights=None),
          'tn':
              tf.metrics.true_negatives(
                  labels=true_label, predictions=pred_label, weights=None),
      }

    metric_tensor.update({key: val[0] for key, val in eval_metrics_ops.items()})
    metric_hook = tf.train.LoggingTensorHook(
        tensors=metric_tensor,
        every_n_iter=10000,
        every_n_secs=None,
        at_end=False,
        formatter=None)
    eval_hooks.append(metric_hook)
    return eval_hooks, eval_metrics_ops
Exemple #18
0
    def call(self, features, **kwargs):
        ''' Implementation of __call__(). '''
        self.train = kwargs['training']
        feats = tf.identity(features['inputs'], name='feats')
        logging.info(features)
        if 'labels' in features:
            labels = features['labels']
        else:
            # serving export mode
            labels = None

        with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
            feats = self.preprocess(feats)
            logits = self.model(feats, labels)
        return logits
Exemple #19
0
def linear(x, names, shapes, has_bias=True):
    """Linear Layer."""
    assert len(shapes) == 2
    with tf.variable_scope(names):
        weights = tf.get_variable(name='weights',
                                  shape=shapes,
                                  initializer=tf.initializers.glorot_uniform())
        if has_bias:
            bias = tf.get_variable(
                name='bias',
                shape=shapes[1],
                initializer=tf.initializers.glorot_uniform())
            return tf.matmul(x, weights) + bias
        else:
            return tf.matmul(x, weights)
Exemple #20
0
 def dense_layer(self, x):
     ''' fc layers'''
     with tf.variable_scope('dense'):
         shape = x.shape[-1].value
         y = common_layers.linear(x, 'dense-matmul',
                                  [shape, self.netconf['hidden1']])
         if self.netconf['use_bn']:
             y = tf.layers.batch_normalization(y,
                                               axis=-1,
                                               momentum=0.99,
                                               training=self.train,
                                               name='dense-bn')
         y = tf.nn.relu6(y)
         if self.netconf['use_dropout']:
             y = tf.layers.dropout(y,
                                   self.netconf['dropout_rate'],
                                   training=self.train)
     return y
Exemple #21
0
 def linear_block(self, x):
     '''
 linear layer for dim reduction
 x: shape [batch, time, feat, channel]
 output: shape [b, t, f]
 '''
     with tf.variable_scope('linear'):
         times, feat, channel = x.shape.as_list()[1:]
         x = tf.reshape(x, [-1, feat * channel])
         if self.netconf['use_dropout']:
             x = tf.layers.dropout(x,
                                   self.netconf['dropout_rate'],
                                   training=self.train)
         x = common_layers.linear(
             x, 'linear1', [feat * channel, self.netconf['linear_num']])
         #x = tf.nn.relu6(x)
         x = tf.reshape(x, [-1, times, self.netconf['linear_num']])
     return x
def splice_layer(x, name, context):
  '''
  Splice a tensor along the last dimension with context.
  e.g.:
  t = [[[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]]
  splice_tensor(t, [0, 1]) =
      [[[1, 2, 3, 4, 5, 6],
        [4, 5, 6, 7, 8, 9],
        [7, 8, 9, 7, 8, 9]]]

  Args:
    tensor: a tf.Tensor with shape (B, T, D) a.k.a. (N, H, W)
    context: a list of context offsets

  Returns:
    spliced tensor with shape (..., D * len(context))
  '''
  with tf.variable_scope(name):
    input_shape = tf.shape(x)
    B, T = input_shape[0], input_shape[1]
    context_len = len(context)
    array = tf.TensorArray(x.dtype, size=context_len)
    for idx, offset in enumerate(context):
      begin = offset
      end = T + offset
      if begin < 0:
        begin = 0
        sliced = x[:, begin:end, :]
        tiled = tf.tile(x[:, 0:1, :], [1, abs(offset), 1])
        final = tf.concat((tiled, sliced), axis=1)
      else:
        end = T
        sliced = x[:, begin:end, :]
        tiled = tf.tile(x[:, -1:, :], [1, abs(offset), 1])
        final = tf.concat((sliced, tiled), axis=1)
      array = array.write(idx, final)
    spliced = array.stack()
    spliced = tf.transpose(spliced, (1, 2, 0, 3))
    spliced = tf.reshape(spliced, (B, T, -1))
  return spliced
Exemple #23
0
 def _build_attention(self, enc_outputs, enc_seq_len):
   with tf.variable_scope("AttentionMechanism"):
     if self.attn_Type == 'bahdanau':
       attention_mechanism = seq2seq.BahdanauAttention(
           num_units=2 * self.cell_dim,
           memory=enc_outputs,
           memory_sequence_length=enc_seq_len,
           probability_fn=tf.nn.softmax,
           normalize=True,
           dtype=tf.get_variable_scope().dtype)
     elif self.params['attention_type'] == 'luong':
       attention_mechanism = seq2seq.LuongAttention(
           num_units=2 * self.cell_dim,
           memory=enc_outputs,
           memory_sequence_length=enc_seq_len,
           probability_fn=tf.nn.softmax,
           dtype=tf.get_variable_scope().dtype)
     else:
       raise ValueError('Unknown Attention Type')
     return attention_mechanism
Exemple #24
0
def conv_pool(embedded_chars_expanded, filter_sizes, embedding_size,
              num_filters, sequence_length):
    """
    text conv and max pooling to get one-dimension vector to representation of text
    :param filter_sizes:
    :return:
    """
    pooled_outputs = []
    for _, filter_size in enumerate(filter_sizes):
        with tf.variable_scope("conv-maxpool-%s" % filter_size):
            # Convolution Layer
            filter_shape = [filter_size, embedding_size, 1, num_filters]
            W = tf.get_variable(name='W',
                                initializer=tf.truncated_normal(filter_shape,
                                                                stddev=0.1))
            b = tf.get_variable(name='b',
                                initializer=tf.constant(0.1,
                                                        shape=[num_filters]))
            conv = tf.nn.conv2d(embedded_chars_expanded,
                                W,
                                strides=[1, 1, 1, 1],
                                padding="VALID",
                                name="conv")
            # Apply nonlinearity
            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            # Maxpooling over the outputs
            pooled = tf.nn.max_pool(
                h,
                ksize=[1, sequence_length - filter_size + 1, 1, 1],
                strides=[1, 1, 1, 1],
                padding='VALID',
                name="pool")
            pooled_outputs.append(pooled)
    # Combine all the pooled features
    num_filters_total = num_filters * len(filter_sizes)

    h_pool = tf.concat(pooled_outputs, 3)

    h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
    return h_pool_flat
Exemple #25
0
    def linear_block(self, x):
        '''
    linear layer for dim reduction
    x: shape [batch, time, feat, channel]
    output: shape [b, t, f]
    '''
        batch_t = tf.shape(x)[0]
        time_t = tf.shape(x)[1]
        feat, channel = x.shape.as_list()[2:]
        linear_num = self.netconf['linear_num']

        if linear_num > 0:
            with tf.variable_scope('linear'):
                x = tf.reshape(x, [batch_t * time_t, feat * channel])

                if self.netconf['use_dropout']:
                    x = tf.layers.dropout(x,
                                          self.netconf['dropout_rate'],
                                          training=self.train)

                x = common_layers.linear(x, 'linear1',
                                         [feat * channel, linear_num])

                x = tf.nn.relu(x)

                if self.netconf['use_bn']:
                    bn_name = 'bn_linear'
                    x = tf.layers.batch_normalization(x,
                                                      axis=-1,
                                                      momentum=0.9,
                                                      training=self.train,
                                                      name=bn_name)

                x = tf.reshape(x, [batch_t, time_t, linear_num])
        else:
            logging.info('linear_num <= 0, only apply reshape.')
            x = tf.reshape(x, [batch_t, time_t, feat * channel])

        return x
Exemple #26
0
    def dense_layer(self, x):
        ''' Embedding layers. '''
        with tf.variable_scope('dense'):
            shape = x.shape[-1].value
            hidden_dims = self.netconf['hidden_dims']
            y = x
            use_bn = self.netconf['use_bn']
            remove_nonlin = self.netconf['remove_last_nonlinearity']

            for idx, hidden in enumerate(hidden_dims):
                last_layer = idx == (len(hidden_dims) - 1)
                layer_add_nonlin = not last_layer or not remove_nonlin
                y = common_layers.linear(y,
                                         'dense-matmul-%d' % (idx + 1),
                                         [shape, hidden],
                                         has_bias=(layer_add_nonlin
                                                   or not use_bn))
                shape = hidden
                embedding = y
                if layer_add_nonlin:
                    y = tf.nn.relu(y)
                if use_bn:
                    y = tf.layers.batch_normalization(y,
                                                      axis=-1,
                                                      momentum=0.99,
                                                      training=self.train,
                                                      name='dense-bn-%d' %
                                                      (idx + 1))
                if self.netconf['use_dropout'] and layer_add_nonlin:
                    y = tf.layers.dropout(y,
                                          self.netconf['dropout_rate'],
                                          training=self.train)
            if self.netconf['embedding_after_linear']:
                logging.info('Output embedding right after linear layer.')
            else:
                logging.info(
                    'Output embedding after non-lin, batch norm and dropout.')
                embedding = y
        return embedding, y
Exemple #27
0
    def preprocess(self, inputs, input_text):
        ''' preprocess speech and text inputs
    params:
      inputs: speech input
      input_text: text input
    '''
        with tf.variable_scope('feature'):
            if self.input_type == 'samples':
                # speech feature config
                self.hp = speech_params(
                    sr=self.taskconf['audio']['sr'],
                    bins=self.audioconf['feature_size'],
                    dither=self.train,
                    use_delta_deltas=self.audioconf['add_delta_deltas'],
                    cmvn=self.audioconf['cmvn'],
                    cmvn_path=self.audioconf['cmvn_path'])

                feats = extract_feature(inputs, params=self.hp)
            else:
                self.mean, self.std = utils.load_cmvn(
                    self.audioconf['cmvn_path'])
                feats = utils.apply_cmvn(inputs, self.mean, self.std)
        return feats, input_text
Exemple #28
0
    def logits_layer(self, x, labels):
        ''' Logits layer to further produce softmax. '''
        if labels is None:
            # serving export mode, no need for logits
            return x

        output_num = self.taskconf['classes']['num']
        logits_type = self.netconf['logits_type']
        logits_shape = [x.shape[-1].value, output_num]

        with tf.variable_scope('logits'):
            init_type = self.netconf['logits_weight_init']['type']
            if init_type == 'truncated_normal':
                stddev = self.netconf['logits_weight_init']['stddev']
                init = tf.truncated_normal_initializer(stddev=stddev)
            elif init_type == 'xavier_uniform':
                init = tf.contrib.layers.xavier_initializer(uniform=True)
            elif init_type == 'xavier_norm':
                init = tf.contrib.layers.xavier_initializer(uniform=False)
            else:
                raise ValueError('Unsupported weight init type: %s' %
                                 (init_type))

            weights = tf.get_variable(name='weights',
                                      shape=logits_shape,
                                      initializer=init)

            if logits_type == 'linear':
                bias = tf.get_variable(
                    name='bias',
                    shape=logits_shape[1],
                    initializer=tf.constant_initializer(0.0))
                return tf.matmul(x, weights) + bias
            elif logits_type == 'linear_no_bias':
                return tf.matmul(x, weights)
            elif logits_type == 'arcface':
                return self.arcface_layer(x, labels, output_num, weights)
Exemple #29
0
 def pooling_layer(self, x, time_len):
     ''' pooling layer'''
     with tf.variable_scope('time_pooling'):
         if self.attention:
             x, self.alphas = common_layers.attention(
                 x, self.netconf['attention_size'], return_alphas=True)
             #alphas shape [batch, time, 1] -> [1, batch, time, 1]-> [1, time, batch, 1]
             tf.summary.image(
                 'alignment',
                 tf.transpose(tf.expand_dims(self.alphas, 0), [0, 2, 1, 3]))
         else:
             if self.netconf['use_lstm_layer']:
                 x = tf.concat(x, 2)
             # [batch, seq_len, dim, 1]
             x = tf.expand_dims(x, axis=-1)
             seq_len = time_len
             x = common_layers.max_pool(x,
                                        ksize=[seq_len, 1],
                                        strides=[seq_len, 1])
             if self.netconf['use_lstm_layer']:
                 x = tf.reshape(x, [-1, 2 * self.netconf['cell_num']])
             else:
                 x = tf.reshape(x, [-1, self.netconf['linear_num']])
         return x
Exemple #30
0
def arcface_loss(embedding,
                 labels,
                 out_num,
                 weights=None,
                 s=64.,
                 m=0.5,
                 limit_to_pi=True):
    '''
  https://github.com/auroua/InsightFace_TF/blob/master/losses/face_losses.py
  :param embedding: the input embedding vectors
  :param labels:  the input labels, the shape should be eg: (batch_size, 1)
  :param s: scalar value default is 64
  :param out_num: output class num
  :param weights: a tf.variable with shape (embedding.shape[-1], out_num)
                  or None to make a new one internally. default = None
  :param m: the margin value, default is 0.5
  :return: the final cacualted output, this output is send into the tf.nn.softmax directly
  '''
    cos_m = math.cos(m)
    sin_m = math.sin(m)
    mm = sin_m * m  # issue 1
    threshold = math.cos(math.pi - m)
    with tf.variable_scope('arcface_loss'):
        # inputs and weights norm
        embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
        embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
        if weights is None:
            weights = tf.get_variable(
                name='weights',
                shape=[embedding.shape[-1].value, out_num],
                initializer=tf.initializer.glorot_unifrom())
        weights_norm = tf.norm(weights, axis=0, keep_dims=True)
        weights = tf.div(weights, weights_norm, name='norm_weights')
        # cos(theta+m)
        cos_t = tf.matmul(embedding, weights, name='cos_t')
        cos_t2 = tf.square(cos_t, name='cos_2')
        sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
        sin_t = tf.sqrt(sin_t2, name='sin_t')
        cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m),
                                 tf.multiply(sin_t, sin_m),
                                 name='cos_mt')

        if limit_to_pi:
            # this condition controls the theta+m should in range [0, pi]
            #      0<=theta+m<=pi
            #     -m<=theta<=pi-m
            cond_v = cos_t - threshold
            cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)

            keep_val = s * (cos_t - mm)
            cos_mt_temp = tf.where(cond, cos_mt, keep_val)
        else:
            cos_mt_temp = cos_mt

        mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
        # mask = tf.squeeze(mask, 1)
        inv_mask = tf.subtract(1., mask, name='inverse_mask')

        s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')

        output = tf.add(tf.multiply(s_cos_t, inv_mask),
                        tf.multiply(cos_mt_temp, mask),
                        name='arcface_loss_output')
    return output