def get_pos_embedding_matrix(max_len, embed_dim, use_const, name): """ generate position embedding matrix, two optional types: constant(untrainable) and trainable. Args: max_len, embed_dim, use_const Return: pos_embed: [max_len, embed_dim] """ # First part of the PE function: sin and cos argument if use_const: pos_embed = np.array([[ pos / np.power(10000, (i - i % 2) / embed_dim) for i in range(embed_dim) ] for pos in range(max_len)]) # Second part, apply the cosine to even columns and sin to odds. pos_embed[:, 0::2] = np.sin(pos_embed[:, 0::2]) # dim 2i pos_embed[:, 1::2] = np.cos(pos_embed[:, 1::2]) # dim 2i+1 pos_embed = pos_embed[np.newaxis, ...] pos_embed = tf.cast(pos_embed, dtype=tf.float32) else: pos_embed = tf.get_variable( name=name, shape=[max_len, embed_dim], initializer=tf.random_uniform_initializer(-0.1, 0.1)) pos_embed = tf.expand_dims(pos_embed, 0) return pos_embed
def __init__(self, config, **kwargs): super().__init__(**kwargs) self.config = config config = self.config model_config = config['model']['net']['structure'] self.num_classes = config['data']['task']['classes']['num'] self.vocab_size = config['data']['task']['text']['vocab_size'] self.max_text_len = config['data']['task']['text']['max_text_len'] self.use_pretrained_embedding = config['model']['use_pre_train_emb'] self.embedding_size = model_config['embedding_size'] self.hidden_dim = model_config['hidden_dim'] self.head_num = model_config['head_num'] self.inner_size = model_config['inner_size'] self.dropout_rate = model_config['dropout_rate'] self.speech_dropout_rate = model_config['speech_dropout_rate'] self.padding_id = model_config.get('padding_id', 0) self.speech_dense_act = config.get('speech_dense_act', 'relu') if self.use_pretrained_embedding: self.embedding_path = config['model']['embedding_path'] logging.info("Loading embedding file from: {}".format( self.embedding_path)) self._word_embedding_init = pickle.load( open(self.embedding_path, 'rb')) self.embed_initializer = tf.constant_initializer( self._word_embedding_init) else: self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1) self.embed = tf.keras.layers.Embedding( self.vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer) self.speech_enc_layer = delta.layers.RnnEncoder(config, name="speech_encoder") self.text_enc_layer = delta.layers.RnnEncoder(config, name="text_encoder") self.align_attn_layer = delta.layers.MultiHeadAttention( self.hidden_dim, self.head_num) self.align_enc_layer = delta.layers.RnnAttentionEncoder( config, name="align_encoder") self.embed_d = tf.keras.layers.Dropout(self.dropout_rate) self.speech_d = tf.keras.layers.Dropout(self.speech_dropout_rate) self.speech_enc_d = tf.keras.layers.Dropout(self.speech_dropout_rate) self.text_enc_d = tf.keras.layers.Dropout(self.dropout_rate) self.attn_enc_d = tf.keras.layers.Dropout(self.dropout_rate) self.align_enc_d = tf.keras.layers.Dropout(self.dropout_rate) self.final_dense = tf.keras.layers.Dense( self.num_classes, activation=tf.keras.activations.linear) self.align_layer_norm = tf.keras.layers.LayerNormalization( epsilon=1e-6) self.speech_dense = tf.keras.layers.Dense( 512, activation=self.speech_dense_act)
def __init__(self, config, **kwargs): super().__init__(**kwargs) logging.info("Initialize MatchRnn...") self.use_pretrained_embedding = config['model']['use_pre_train_emb'] if self.use_pretrained_embedding: self.embedding_path = config['model']['embedding_path'] logging.info("Loading embedding file from: {}".format( self.embedding_path)) self._word_embedding_init = pickle.load(open(self.embedding_path, 'rb')) self.embed_initializer = tf.constant_initializer( self._word_embedding_init) else: self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1)
def __init__(self, config, **kwargs): super().__init__(**kwargs) logging.info("Initialize S2SModel") data_config = config['data'] model_config = config['model']['net']['structure'] self.use_label_vocab = data_config['task']['use_label_vocab'] self.label_vocab_size = data_config['label_vocab_size'] self.vocab_size = config['data']['vocab_size'] self.use_pretrained_embedding = config['model']['use_pre_train_emb'] self.embedding_size = model_config['embedding_size'] if self.use_pretrained_embedding: self.embedding_path = config['model']['embedding_path'] logging.info("Loading embedding file from: {}".format( self.embedding_path)) self._word_embedding_init = pickle.load( open(self.embedding_path, 'rb')) self.embed_initializer = tf.constant_initializer( self._word_embedding_init) else: self.embed_initializer = tf.random_uniform_initializer(-0.1, 0.1) self.embed = tf.keras.layers.Embedding( self.vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer) self.share_embedding = model_config['share_embedding'] if self.use_label_vocab: self.decode_vocab_size = self.label_vocab_size else: self.decode_vocab_size = self.vocab_size if self.share_embedding: self.decoder_embed = self.embed else: self.decoder_embed = tf.keras.layers.Embedding( self.decode_vocab_size, self.embedding_size, embeddings_initializer=self.embed_initializer)