def __init__(self, config): super(Transducer, self).__init__() # define encoder self.config = config self.encoder = build_encoder(config) # define decoder self.decoder = build_decoder(config) # define JointNet self.joint = JointNet( input_size=config.joint.input_size, inner_dim=config.joint.inner_size, vocab_size=config.vocab_size ) if config.share_embedding: assert self.decoder.embedding.weight.size() == self.joint.project_layer.weight.size(), '%d != %d' % (self.decoder.embedding.weight.size(1), self.joint.project_layer.weight.size(1)) self.joint.project_layer.weight = self.decoder.embedding.weight self.crit = RNNTLoss(blank=28)
def build_model(self): print('building model... ...') with tf.variable_scope('seq2seq_placeholder'): self.encoder_inputs = tf.placeholder(tf.int32, [None, None], name="encoder_inputs") self.decoder_inputs = tf.placeholder(tf.int32, [None, None], name="decoder_inputs") self.decoder_targets = tf.placeholder(tf.int32, [None, None], name="decoder_targets") self.decoder_targets_masks = tf.placeholder(tf.float32, [None, None], name="mask") self.encoder_length = tf.placeholder(tf.int32, [None], name="encoder_length") self.decoder_length = tf.placeholder(tf.int32, [None], name="decoder_length") self.max_target_sequence_length = tf.reduce_max( self.decoder_length, name='max_target_len') with tf.variable_scope('seq2seq_embedding'): self.embedding = self.init_embedding(self.vocab_size, self.embedding_size) with tf.variable_scope('seq2seq_encoder'): encoder_outputs, encoder_states = build_encoder( self.embedding, self.encoder_inputs, self.encoder_length, self.enc_num_layers, self.enc_num_units, self.enc_cell_type, bidir=self.enc_bidir) with tf.variable_scope('seq2seq_decoder'): encoder_length = self.encoder_length if self.beam_search: print("use beamsearch decoding..") encoder_outputs = tile_batch(encoder_outputs, multiplier=self.beam_size) encoder_states = tile_batch(encoder_states, multiplier=self.beam_size) encoder_length = tile_batch(encoder_length, multiplier=self.beam_size) attention_mechanism = BahdanauAttention( num_units=self.attn_num_units, memory=encoder_outputs, memory_sequence_length=encoder_length) decoder_cell = create_rnn_cell(self.dec_num_layers, self.dec_num_units, self.dec_cell_type) decoder_cell = AttentionWrapper( cell=decoder_cell, attention_mechanism=attention_mechanism, attention_layer_size=self.dec_num_units, name='Attention_Wrapper') batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size decoder_initial_state = decoder_cell.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=encoder_states) output_layer = tf.layers.Dense(self.vocab_size, use_bias=False, name='output_projection') if self.mode == 'train': decoder_inputs_embedded = tf.nn.embedding_lookup( self.embedding, self.decoder_inputs) # training helper的作用就是决定下一个时序的decoder的输入为给定的decoder inputs, 而不是上一个时刻的输出 training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_length, name='training_helper') training_decoder = tf.contrib.seq2seq.BasicDecoder( cell=decoder_cell, helper=training_helper, initial_state=decoder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length) self.decoder_logits_train = decoder_outputs.rnn_output self.loss = tf.contrib.seq2seq.sequence_loss( logits=self.decoder_logits_train, targets=self.decoder_targets, weights=self.decoder_targets_masks) optimizer = tf.train.AdamOptimizer(self.learning_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.train_op = optimizer.apply_gradients( zip(clip_gradients, trainable_params)) elif self.mode == 'infer': start_tokens = tf.ones([ self.batch_size, ], tf.int32) * SOS_ID # 这里的batch_size不需要复制 end_token = EOS_ID if self.beam_search: inference_decoder = BeamSearchDecoder( cell=decoder_cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_size, output_layer=output_layer) else: decoding_helper = GreedyEmbeddingHelper( embedding=self.embedding, start_tokens=start_tokens, end_token=end_token) inference_decoder = BasicDecoder( cell=decoder_cell, helper=decoding_helper, initial_state=decoder_initial_state, output_layer=output_layer) decoder_outputs, _, _ = dynamic_decode( decoder=inference_decoder, maximum_iterations=self.infer_max_iter) if self.beam_search: infer_outputs = decoder_outputs.predicted_ids # [batch_size, decoder_targets_length, beam_size] self.infer_outputs = tf.transpose( infer_outputs, [0, 2, 1 ]) # [batch_size, beam_size, decoder_targets_length] else: self.infer_outputs = decoder_outputs.sample_id # [batch_size, decoder_targets_length] self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=self.max_to_keep)
def build_model(self): print('building model... ...') with tf.variable_scope('seq2seq_placeholder'): self.encoder_inputs = tf.placeholder(tf.int32, [None, None], name="encoder_inputs") self.decoder_inputs = tf.placeholder(tf.int32, [None, None], name="decoder_inputs") self.decoder_targets = tf.placeholder(tf.int32, [None, None], name="decoder_targets") self.decoder_targets_masks = tf.placeholder(tf.bool, [None, None], name="mask") self.encoder_length = tf.placeholder(tf.int32, [None], name="encoder_length") self.decoder_length = tf.placeholder(tf.int32, [None], name="decoder_length") # ECM placeholder self.choice_qs = tf.placeholder(tf.float32, [None, None], name="choice") self.emo_cat = tf.placeholder(tf.int32, [None], name="emotion_category") self.max_target_sequence_length = tf.reduce_max( self.decoder_length, name='max_target_len') with tf.variable_scope('seq2seq_embedding'): self.embedding = self.init_embedding(self.vocab_size, self.embedding_size) # create emotion category embeddings emo_initializer = tf.contrib.layers.xavier_initializer() emo_cat_embeddings = tf.get_variable( "emo_cat_embeddings", [self.num_emotion, self.emo_cat_emb_size], initializer=emo_initializer, dtype=tf.float32) self.emo_internal_memory_embedding = tf.get_variable( "emo_internal_memory_embedding", [self.num_emotion, self.emo_internal_memory_units], initializer=emo_initializer, dtype=tf.float32) self.emo_cat_embs = tf.nn.embedding_lookup(emo_cat_embeddings, self.emo_cat) with tf.variable_scope('seq2seq_encoder'): encoder_outputs, encoder_states = build_encoder( self.embedding, self.encoder_inputs, self.encoder_length, self.enc_num_layers, self.enc_num_units, self.enc_cell_type, bidir=self.enc_bidir) with tf.variable_scope('seq2seq_decoder'): encoder_length = self.encoder_length emo_cat = self.emo_cat emo_cat_embs = self.emo_cat_embs if self.beam_search: print("use beamsearch decoding..") encoder_outputs = tile_batch(encoder_outputs, multiplier=self.beam_size) encoder_states = tile_batch(encoder_states, multiplier=self.beam_size) encoder_length = tile_batch(encoder_length, multiplier=self.beam_size) emo_cat = tile_batch(emo_cat, multiplier=self.beam_size) emo_cat_embs = tile_batch(emo_cat_embs, multiplier=self.beam_size) attention_mechanism = BahdanauAttention( num_units=self.attn_num_units, memory=encoder_outputs, memory_sequence_length=encoder_length) decoder_cell = create_rnn_cell(self.dec_num_layers, self.dec_num_units, self.dec_cell_type) self.read_g = tf.layers.Dense(self.emo_internal_memory_units, use_bias=False, name="internal_read_gate") self.write_g = tf.layers.Dense(self.emo_internal_memory_units, use_bias=False, name="internal_write_gate") decoder_cell = ECMWrapper( cell=decoder_cell, attention_mechanism=attention_mechanism, emo_cat_embs=emo_cat_embs, # emotion category embedding emo_cat=emo_cat, # emotion category emo_internal_memory_units=self. emo_internal_memory_units, # emotion memory size emo_internal_memory_embedding=self. emo_internal_memory_embedding, # num of emotions read_gate=self.read_g, write_gate=self.write_g, attention_layer_size=self.dec_num_units, name='ECMWrapper') batch_size = self.batch_size if not self.beam_search else self.batch_size * self.beam_size decoder_initial_state = decoder_cell.zero_state( batch_size=batch_size, dtype=tf.float32).clone(cell_state=encoder_states) output_layer = tf.layers.Dense( self.vocab_size, use_bias=False, name='output_projection') # 普通词典projection # ECM external memory module emo_output_layer = tf.layers.Dense( self.vocab_size, use_bias=False, name="emo_output_projection") # 情感词典projection emo_choice_layer = tf.layers.Dense( 1, use_bias=False, name="emo_choice_alpha") # 选择情感词概率projection if self.mode == 'train': decoder_inputs_embedded = tf.nn.embedding_lookup( self.embedding, self.decoder_inputs) # training helper的作用就是决定下一个时序的decoder的输入为给定的decoder inputs, 而不是上一个时刻的输出 training_helper = TrainingHelper( inputs=decoder_inputs_embedded, sequence_length=self.decoder_length, name='training_helper') training_decoder = BasicDecoder( cell=decoder_cell, helper=training_helper, initial_state=decoder_initial_state) self.decoder_outputs, self.final_state, self.final_sequence_length = dynamic_decode( decoder=training_decoder, impute_finished=True, maximum_iterations=self.max_target_sequence_length) self.decoder_logits_train = tf.identity( self.decoder_outputs.rnn_output) with tf.variable_scope('decoder'): self.generic_logits = output_layer( self.decoder_logits_train) # 得到普通词的概率分布logits self.emo_ext_logits = emo_output_layer( self.decoder_logits_train) # 得到情感词的概率分布logits self.alphas = tf.nn.sigmoid( emo_choice_layer( self.decoder_logits_train)) # 得到选择情感词的概率 self.int_M_emo = self.final_state.internal_memory # internal_memory的最终状态 g_probs = tf.nn.softmax( self.generic_logits) * (1 - self.alphas) e_probs = tf.nn.softmax(self.emo_ext_logits) * self.alphas train_log_probs = tf.log(g_probs + e_probs) # compute losses self.alphas = tf.squeeze(self.alphas, axis=-1) self.g_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.generic_logits, labels=self.decoder_targets) - tf.log(1 - self.alphas) self.e_losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.emo_ext_logits, labels=self.decoder_targets) - tf.log(self.alphas) losses = self.g_losses * ( 1 - self.choice_qs) + self.e_losses * self.choice_qs # alpha and internal memory regularizations self.alpha_reg = tf.reduce_mean(self.choice_qs * -tf.log(self.alphas)) self.int_mem_reg = tf.reduce_mean( tf.norm(self.int_M_emo + 1e-7, axis=1)) losses = tf.boolean_mask(losses, self.decoder_targets_masks) self.loss = tf.reduce_mean( losses) + self.alpha_reg + self.int_mem_reg # prepare for perlexity computations CE = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_log_probs, labels=self.decoder_targets) CE = tf.boolean_mask(CE, self.decoder_targets_masks) self.CE = tf.reduce_mean(CE) optimizer = tf.train.AdamOptimizer(self.learning_rate) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) clip_gradients, _ = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.train_op = optimizer.apply_gradients( zip(clip_gradients, trainable_params)) elif self.mode == 'infer': start_tokens = tf.ones([ self.batch_size, ], tf.int32) * SOS_ID end_token = EOS_ID inference_decoder = ECMBeamSearchDecoder( cell=decoder_cell, embedding=self.embedding, start_tokens=start_tokens, end_token=end_token, initial_state=decoder_initial_state, beam_width=self.beam_size, output_layer=output_layer, emo_output_layer=emo_output_layer, emo_choice_layer=emo_choice_layer) decoder_outputs, _, _ = dynamic_decode( decoder=inference_decoder, maximum_iterations=self.infer_max_iter) infer_outputs = decoder_outputs.predicted_ids # [batch_size, decoder_targets_length, beam_size] self.infer_outputs = tf.transpose( infer_outputs, [0, 2, 1], name='infer_outputs' ) # [batch_size, beam_size, decoder_targets_length] self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=self.max_to_keep)
from encoder import build_encoder from decoder import build_decoder (X_train, _), (X_test, _) = mnist.load_data() image_size = X_train.shape[1] X_train = np.reshape(X_train, [-1, image_size, image_size, 1]) X_test = np.reshape(X_test, [-1, image_size, image_size, 1]) X_train = X_train.astype('float32') / 255 X_test = X_test.astype('float32') / 255 latent_dim = 16 batch_size = 128 kernel_size = 3 layer_filters = [32, 64] inputs, encoder, shape = build_encoder(image_size, latent_dim, layer_filters, kernel_size) decoder = build_decoder(shape, latent_dim, layer_filters, kernel_size) autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') autoencoder.summary() autoencoder.compile(loss='mse', optimizer='adam') autoencoder.fit(X_train, X_train, validation_data=(X_test, X_test), epochs=30, batch_size=batch_size)