def __init__(self, generator, sample_id): self.global_step = generator.global_step self.training = generator.training self.batch_input = generator.batch_input self.vocab_size = generator.vocab_size self.hparams = generator.hparams self.time_major = self.hparams.time_major self.sample_id = tf.transpose(sample_id) self.reverse_vocab_table = generator.reverse_vocab_table # create two copies of discriminator, one for real pairs and one for fake pairs # they share the same underlying variables with tf.name_scope("real_discriminator"): with tf.variable_scope("discriminator"): self.predict_real = self._build_disc( self.batch_input.original_source, self.batch_input.original_target) with tf.name_scope("fake_discriminator"): with tf.variable_scope("discriminator", reuse=True): self.predict_fake = self._build_disc( self.batch_input.original_source, self.sample_id) with tf.name_scope("discriminator_loss"): zeros = tf.zeros(tf.shape(self.predict_real[:, 0]), dtype=tf.int32) loss_real = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.predict_real, labels=zeros)) self.accuracy_real = tf.metrics.accuracy( zeros, tf.argmax(self.predict_real, 1)) ones = tf.ones(tf.shape(self.predict_fake[:, 0]), dtype=tf.int32) loss_fake = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.predict_fake, labels=ones)) self.accuracy_fake = tf.metrics.accuracy( ones, tf.argmax(self.predict_fake, 1)) self.loss = loss_real + loss_fake self.gan_loss = -tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.predict_fake, labels=ones)) with tf.name_scope("discriminator_train"): discrim_tvars = [ var for var in tf.trainable_variables() if var.name.startswith("discriminator") ] discrim_optim = tf.train.AdamOptimizer() gradients = tf.gradients(self.loss, discrim_tvars) clipped_gradients, _ = model_helper.gradient_clip( gradients, max_gradient_norm=self.hparams.max_gradient_norm) self.update = discrim_optim.apply_gradients( zip(clipped_gradients, discrim_tvars))
def __init__(self, training, tokenized_data, batch_input, scope=None): """ Create the model. Args: training: A boolean value to indicate whether this model will be used for training. tokenized_data: The data object containing all information required for the model. scope: scope of the model. """ self.training = training self.batch_input = batch_input self.vocab_table = tokenized_data.vocab_table self.vocab_size = tokenized_data.vocab_size self.reverse_vocab_table = tokenized_data.reverse_vocab_table hparams = tokenized_data.hparams self.hparams = hparams self.num_layers = hparams.num_layers self.time_major = hparams.time_major # Initializer initializer = model_helper.get_initializer(hparams.init_op, hparams.random_seed, hparams.init_weight) tf.get_variable_scope().set_initializer(initializer) # Embeddings self.embedding = (model_helper.create_embbeding( vocab_size=self.vocab_size, embed_size=hparams.num_units, scope=scope)) # This batch_size might vary among each batch instance due to the bucketing and/or reach # the end of the training set. Treat it as size_of_the_batch. self.batch_size = tf.size(self.batch_input.source_sequence_length) # Projection with tf.variable_scope(scope or "build_network"): with tf.variable_scope("decoder/output_projection"): self.output_layer = layers_core.Dense(self.vocab_size, use_bias=False, name="output_projection") # Training or inference graph print("# Building graph for the model ...") res = self.build_graph(hparams, scope=scope) if training: self.train_loss = res[1] self.word_count = tf.reduce_sum(self.batch_input.source_sequence_length) + \ tf.reduce_sum(self.batch_input.target_sequence_length) # Count the number of predicted words for compute perplexity. self.predict_count = tf.reduce_sum( self.batch_input.target_sequence_length) else: self.infer_logits, _, self.final_context_state, self.sample_id = res self.sample_words = self.reverse_vocab_table.lookup( tf.to_int64(self.sample_id)) self.global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() # Gradients update operation for training the model. if training: self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.train_loss, params) clipped_gradients, gradient_norm_summary = model_helper.gradient_clip( gradients, max_gradient_norm=hparams.max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Summary self.train_summary = tf.summary.merge([ tf.summary.scalar("learning_rate", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss), ] + gradient_norm_summary) else: self.infer_summary = tf.no_op() # Saver self.saver = tf.train.Saver(tf.global_variables()) # Print trainable variables if training: print("# Trainable variables:") for param in params: print(" {}, {}, {}".format(param.name, str(param.get_shape()), param.op.device))
def __init__(self, training, tokenized_data, batch_input, scope=None): """ Create the model. Args: training: A boolean value to indicate whether this model will be used for training. tokenized_data: The data object containing all information required for the model. scope: scope of the model. """ self.training = training self.batch_input = batch_input self.vocab_list = tokenized_data.vocab_list self.vocab_table = tokenized_data.vocab_table self.vocab_size = tokenized_data.vocab_size self.reverse_vocab_table = tokenized_data.reverse_vocab_table hparams = tokenized_data.hparams self.hparams = hparams self.num_layers = hparams.num_layers self.time_major = hparams.time_major # Initializer initializer = model_helper.get_initializer( hparams.init_op, hparams.random_seed, hparams.init_weight) tf.get_variable_scope().set_initializer(initializer) # Embeddings self.embedding = model_helper.create_embedding(vocab_size=self.vocab_size, embed_size=hparams.num_units, trainable=hparams.train_embeddings, scope=scope) if training and hparams.pretrained_embeddings: from settings import PROJECT_ROOT pretrained_embeddings_file = os.path.join(PROJECT_ROOT, 'Data', 'Corpus', hparams.pretrained_embeddings) self.pretrained = model_helper.populate_embedding(self.embedding, self.vocab_list, pretrained_embeddings_file) else: self.pretrained = None # This batch_size might vary among each batch instance due to the bucketing and/or reach # the end of the training set. Treat it as size_of_the_batch. self.batch_size = tf.size(self.batch_input.source_sequence_length) # Projection with tf.variable_scope(scope or "build_network"): with tf.variable_scope("decoder/output_projection"): self.output_layer = layers_core.Dense( self.vocab_size, use_bias=False, name="output_projection") self.global_step = tf.Variable(0, trainable=False) # Training or inference graph print("# Building graph for the model ...") res = self.build_graph(hparams, scope=scope) if training: self.train_loss = res[1] self.word_count = tf.reduce_sum(self.batch_input.source_sequence_length) + \ tf.reduce_sum(self.batch_input.target_sequence_length) # Count the number of predicted words for compute perplexity. self.predict_count = tf.reduce_sum(self.batch_input.target_sequence_length) self.sample_id = res[-2] self.greedy_sample_id = res[-1] else: self.infer_logits, _, self.final_context_state, self.sample_id, self.greedy_sample_id = res self.sample_words = self.reverse_vocab_table.lookup(tf.to_int64(self.sample_id)) gen_tvars = [var for var in tf.trainable_variables() if var.name.startswith("dynamic_seq2seq")] # Gradients update operation for training the model. if training: #with tf.control_dependencies([]): self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') opt = tf.train.AdamOptimizer(self.learning_rate) #depends = [self.disc.update] if self.disc else [] #with tf.control_dependencies(depends): gradients = tf.gradients(self.train_loss, gen_tvars) clipped_gradients, _ = model_helper.gradient_clip( gradients, max_gradient_norm=hparams.max_gradient_norm) self.train_discriminator = tf.placeholder(tf.bool, shape=[], name='train_discriminator') update_gen = opt.apply_gradients(zip(clipped_gradients, gen_tvars), global_step=self.global_step) #update_gen = tf.cond(tf.logical_not(self.train_discriminator), lambda: update_gen, lambda: tf.no_op()) # Need to review this # I try to make the discriminator train only the accuracy falls a lot update_disc = tf.cond(self.train_discriminator, lambda: self.disc.update, lambda: tf.no_op()) self.update = tf.group(self.disc.loss, self.disc.accuracy_real[1], self.disc.accuracy_fake[1], update_disc, self.train_loss, update_gen) scalars = [ tf.summary.scalar("learning_rate", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss), ] if self.disc: scalars += self.disc.metrics() # Summary self.train_summary = tf.summary.merge(scalars) else: self.infer_summary = tf.no_op() # Saver if self.hparams.only_restore_gan: variables = [v for v in tf.global_variables() if not v.name.startswith('discriminator')] else: variables = tf.global_variables() self.saver = tf.train.Saver(variables, max_to_keep=2) # Print trainable variables if training: print("# Trainable variables:") for param in tf.trainable_variables(): print(" {}, {}, {}".format(param.name, str(param.get_shape()), param.op.device))
def __init__(self, training, tokenized_data, batch_input, scope=None): """ Create the model. Args: training: A boolean value to indicate whether this model will be used for training. tokenized_data: The data object containing all information required for the model. scope: scope of the model. """ self.training = training self.batch_input = batch_input self.vocab_table = tokenized_data.vocab_table self.vocab_size = tokenized_data.vocab_size self.reverse_vocab_table = tokenized_data.reverse_vocab_table hparams = tokenized_data.hparams self.hparams = hparams self.num_layers = hparams.num_layers self.time_major = hparams.time_major # Initializer initializer = model_helper.get_initializer(hparams.init_op, hparams.random_seed, hparams.init_weight) tf.get_variable_scope().set_initializer(initializer) # Embeddings self.embedding = (model_helper.create_embbeding( vocab_size=self.vocab_size, embed_size=hparams.num_units, scope=scope)) # This batch_size might vary among each batch instance due to the bucketing and/or reach # the end of the training set. Treat it as size_of_the_batch. self.batch_size = tf.size(self.batch_input.source_sequence_length) # Projection with tf.variable_scope(scope or "build_network"): with tf.variable_scope("decoder/output_projection"): self.output_layer = layers_core.Dense(self.vocab_size, use_bias=False, name="output_projection") # Training or inference graph print('\n\n{} Building graph for the model ...{}\n'.format( colorama.Fore.GREEN, colorama.Fore.RESET)) print("***************************************") res = self.build_graph(hparams, scope=scope) if training: self.train_loss = res[1] self.word_count = tf.reduce_sum(self.batch_input.source_sequence_length) + \ tf.reduce_sum(self.batch_input.target_sequence_length) # Count the number of predicted words for compute perplexity. self.predict_count = tf.reduce_sum( self.batch_input.target_sequence_length) else: self.infer_logits, _, self.final_context_state, self.sample_id = res self.sample_words = self.reverse_vocab_table.lookup( tf.to_int64(self.sample_id)) self.global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() # Gradients update operation for training the model. if training: self.learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate') opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.train_loss, params) # Added by Nuruzzaman on 18/05/2018 # Run the training for at least one epoch, note down the time, # and then set it to False (or just remove it) and run the training # for at least one epoch and see if the times required for one epoch # are significantly different. It is shocking to me at least. colocate_gradients_with_ops = True clipped_gradients, gradient_norm_summary = model_helper.gradient_clip( gradients, max_gradient_norm=hparams.max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Summary self.train_summary = tf.summary.merge([ tf.summary.scalar("learning_rate", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss), ] + gradient_norm_summary) else: self.infer_summary = tf.no_op() # Saver self.saver = tf.train.Saver(tf.global_variables())