def conv2d(x, name, filter_size, in_channels, out_channels, strides, bias=True): """2D convolution.""" with tf.variable_scope(name): kernel = tf.get_variable( name='DW', shape=[filter_size[0], filter_size[1], in_channels, out_channels], dtype=tf.float32, initializer=tf.initializers.glorot_uniform()) if bias: b = tf.get_variable(name='bias', shape=[out_channels], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) out = tf.nn.conv2d(x, kernel, [1, strides[0], strides[1], 1], padding='SAME') if bias: out = tf.nn.bias_add(out, b) return out
def __init__(self, config, **kwargs): super().__init__(**kwargs) self.config = config config = self.config model_config = config['model']['net']['structure'] self.num_classes = config['data']['task']['classes']['num'] self.vocab_size = config['data']['task']['text']['vocab_size'] self.max_text_len = config['data']['task']['text']['max_text_len'] self.use_pretrained_embedding = config['model']['use_pre_train_emb'] self.embedding_size = model_config['embedding_size'] self.hidden_dim = model_config['hidden_dim'] self.head_num = model_config['head_num'] self.inner_size = model_config['inner_size'] self.dropout_rate = model_config['dropout_rate'] self.speech_dropout_rate = model_config['speech_dropout_rate'] self.padding_id = model_config.get('padding_id', 0) self.speech_dense_act = config.get('speech_dense_act', 'relu') if self.use_pretrained_embedding: self.embedding_path = config['model']['embedding_path'] logging.info("Loading embedding file from: {}".format( self.embedding_path)) self._word_embedding_init = pickle.load( open(self.embedding_path, 'rb')) self.embed_initializer = tf.constant_initializer( self._word_embedding_init) else: self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1) self.embed = tf.keras.layers.Embedding( self.vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer) self.speech_enc_layer = delta.layers.RnnEncoder(config, name="speech_encoder") self.text_enc_layer = delta.layers.RnnEncoder(config, name="text_encoder") self.align_attn_layer = delta.layers.MultiHeadAttention( self.hidden_dim, self.head_num) self.align_enc_layer = delta.layers.RnnAttentionEncoder( config, name="align_encoder") self.embed_d = tf.keras.layers.Dropout(self.dropout_rate) self.speech_d = tf.keras.layers.Dropout(self.speech_dropout_rate) self.speech_enc_d = tf.keras.layers.Dropout(self.speech_dropout_rate) self.text_enc_d = tf.keras.layers.Dropout(self.dropout_rate) self.attn_enc_d = tf.keras.layers.Dropout(self.dropout_rate) self.align_enc_d = tf.keras.layers.Dropout(self.dropout_rate) self.final_dense = tf.keras.layers.Dense( self.num_classes, activation=tf.keras.activations.linear) self.align_layer_norm = tf.keras.layers.LayerNormalization( epsilon=1e-6) self.speech_dense = tf.keras.layers.Dense( 512, activation=self.speech_dense_act)
def prelu_layer(self, x, name, num_parameters=1, init=0.25): if num_parameters == 1: shape = 1 else: shape = x.get_shape()[-1] alpha = tf.get_variable(name, shape=shape, dtype=x.dtype, initializer=tf.constant_initializer(init)) return tf.maximum(0.0, x) + alpha * tf.minimum(0.0, x)
def tdnn(x, name, in_dim, context, out_dim, has_bias=True, method='splice_layer'): ''' TDNN implementation. Args: context: a int of left and right context, or a list of context indexes, e.g. (-2, 0, 2). method: splice_layer: use column-first patch-based copy. splice_op: use row-first while_loop copy. conv1d: use conv1d as TDNN equivalence. ''' if hasattr(context, '__iter__'): context_size = len(context) if method in ('splice_op', 'conv1d'): msg = 'Method splice_op and conv1d does not support context list.' raise ValueError(msg) context_list = context else: context_size = context * 2 + 1 context_list = range(-context, context + 1) with tf.variable_scope(name): if method == 'splice_layer': x = splice_layer(x, 'splice', context_list) x = linear(x, 'linear', [in_dim * context_size, out_dim], has_bias=has_bias) elif method == 'splice_op': x = speech_ops.splice(x, context, context) x = linear(x, 'linear', [in_dim * context_size, out_dim], has_bias=has_bias) elif method == 'conv1d': kernel = tf.get_variable( name='DW', shape=[context, in_dim, out_dim], dtype=tf.float32, initializer=tf.glorot_uniform_initializer()) x = tf.nn.conv1d(x, kernel, stride=1, padding='SAME') if has_bias: b = tf.get_variable(name='bias', shape=[out_dim], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) x = tf.nn.bias_add(x, b) else: raise ValueError('Unsupported method: %s.' % (method)) return x
def __init__(self, max_len, embedding_dim, **kwargs): self.max_len = max_len self.embedding_dim = embedding_dim self.pos_embedding_matrix = self.get_pos_embedding_matrix( self.max_len, self.embedding_dim) embed_initializer = tf.constant_initializer(self.pos_embedding_matrix) self.pos_embedding_layer = tf.keras.layers.Embedding( *self.pos_embedding_matrix.shape, trainable=False, embeddings_initializer=embed_initializer) self.get_pos_layer = tf.keras.layers.Lambda(self.get_pos) self.mask_layer = tf.keras.layers.Lambda(self.mask_outputs) super().__init__(**kwargs)
def __init__(self, config, **kwargs): super().__init__(**kwargs) logging.info("Initialize MatchRnn...") self.use_pretrained_embedding = config['model']['use_pre_train_emb'] if self.use_pretrained_embedding: self.embedding_path = config['model']['embedding_path'] logging.info("Loading embedding file from: {}".format( self.embedding_path)) self._word_embedding_init = pickle.load(open(self.embedding_path, 'rb')) self.embed_initializer = tf.constant_initializer( self._word_embedding_init) else: self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1)
def __init__(self, config, **kwargs): super().__init__(config, **kwargs) model_config = config['model']['net']['structure'] self.dropout_rate = model_config['dropout_rate'] self.sequence_length = config['data']['task']['max_seq_len'] self.vocab_size = config['data']['vocab_size'] self.num_classes = config['data']['task']['classes']['num_classes'] self.embedding_size = model_config['embedding_size'] self.num_units = model_config['num_units'] self.num_layers = model_config['num_layers'] self.filter_sizes = model_config['filter_sizes'] self.num_filters = model_config['num_filters'] self.l2_reg_lambda = model_config['l2_reg_lambda'] self.embed = tf.keras.layers.Embedding( self.vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer) self.conv2ds = [] self.pools = [] for i, filter_size in enumerate(self.filter_sizes): conv2d = tf.keras.layers.Conv2D( filters=self.num_filters, kernel_size=(filter_size, self.embedding_size), kernel_initializer=tf.truncated_normal_initializer(stddev=0.02), bias_initializer=tf.constant_initializer(value=0.0), padding='valid', name='conv_{}'.format(i)) pool = tf.keras.layers.MaxPool2D( pool_size=(self.sequence_length - filter_size + 1, 1), strides=(1, 1), padding='valid', name='name_{}'.format(i)) self.conv2ds.append(conv2d) self.pools.append(pool) self.flat = tf.keras.layers.Flatten() self.dense = tf.keras.layers.Dense(64, activation=tf.keras.activations.relu) self.dropout = tf.keras.layers.Dropout(rate=self.dropout_rate) self.final_dense = tf.keras.layers.Dense( self.num_classes, activation=tf.keras.activations.linear)
def logits_layer(self, x, labels): ''' Logits layer to further produce softmax. ''' if labels is None: # serving export mode, no need for logits return x output_num = self.taskconf['classes']['num'] logits_type = self.netconf['logits_type'] logits_shape = [x.shape[-1].value, output_num] with tf.variable_scope('logits'): init_type = self.netconf['logits_weight_init']['type'] if init_type == 'truncated_normal': stddev = self.netconf['logits_weight_init']['stddev'] init = tf.truncated_normal_initializer(stddev=stddev) elif init_type == 'xavier_uniform': init = tf.contrib.layers.xavier_initializer(uniform=True) elif init_type == 'xavier_norm': init = tf.contrib.layers.xavier_initializer(uniform=False) else: raise ValueError('Unsupported weight init type: %s' % (init_type)) weights = tf.get_variable(name='weights', shape=logits_shape, initializer=init) if logits_type == 'linear': bias = tf.get_variable( name='bias', shape=logits_shape[1], initializer=tf.constant_initializer(0.0)) return tf.matmul(x, weights) + bias elif logits_type == 'linear_no_bias': return tf.matmul(x, weights) elif logits_type == 'arcface': return self.arcface_layer(x, labels, output_num, weights)
def __init__(self, config, **kwargs): super().__init__(**kwargs) logging.info("Initialize S2SModel") data_config = config['data'] model_config = config['model']['net']['structure'] self.use_label_vocab = data_config['task']['use_label_vocab'] self.label_vocab_size = data_config['label_vocab_size'] self.vocab_size = config['data']['vocab_size'] self.use_pretrained_embedding = config['model']['use_pre_train_emb'] self.embedding_size = model_config['embedding_size'] if self.use_pretrained_embedding: self.embedding_path = config['model']['embedding_path'] logging.info("Loading embedding file from: {}".format( self.embedding_path)) self._word_embedding_init = pickle.load( open(self.embedding_path, 'rb')) self.embed_initializer = tf.constant_initializer( self._word_embedding_init) else: self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1) self.embed = tf.keras.layers.Embedding( self.vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer) self.share_embedding = model_config['share_embedding'] if self.use_label_vocab: self.decode_vocab_size = self.label_vocab_size else: self.decode_vocab_size = self.vocab_size if self.share_embedding: self.decoder_embed = self.embed else: self.decoder_embed = tf.keras.layers.Embedding( self.decode_vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer)