def load(self): import re print(" [*] Reading checkpoints from {}".format(self.checkpoint_dir)) if self.name is not None: print("Name is", self.name) variables_to_load = [ v for v in tf.global_variables() if self.name in v.name ] print("Loading and saving variables ... ") show_variables(variables_to_load) self.saver = tf.train.Saver(variables_to_load) else: print("Loading and saving variables ... ") show_variables(tf.global_variables()) self.saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(self.checkpoint_dir, ckpt_name)) counter = int( next(re.finditer("(\d+)(?!.*\d)", ckpt_name)).group(0)) print(" [*] Success to read {}".format(ckpt_name)) return True, counter else: print(" [*] Failed to find a checkpoint") # When no checkpoint is found, save parameters in the folder self.save_params() return False, 0
def train(self): # Define summary writer for saving log files (for training and validation) self.writer = tf.summary.FileWriter(os.path.join(self.log_dir, 'training/'), graph=tf.get_default_graph()) self.vwriter = tf.summary.FileWriter(os.path.join(self.log_dir, 'validation/'), graph=tf.get_default_graph()) # Show list of all variables and total parameter count show_variables() # Recover kl_wt when restoring from checkpoint # Get handles for training and validation datasets self.training_handles, self.validation_handles, kl_wt = self.sess.run([[d.string_handle() for d in self.training_datasets], [d.string_handle() for d in self.validation_datasets], self.kl_wt_value]) if not (kl_wt == 0.0): self.kl_weight = kl_wt print("\n[ Restoring Variables ]\n") else: print("\n[ Initializing Variables ]\n") self.current_handle = 0 self.training_handle = self.training_handles[self.current_handle] self.validation_handle = self.validation_handles[self.current_handle] # Iterate through training steps while not self.sess.should_stop(): # Update global step step = tf.train.global_step(self.sess, self.global_step) # Switch to next rotated dataset after each epoch #if step % self.prefetch_count == 0: if step % self.dataset_step == 0: self.current_handle = int(np.mod(self.current_handle + 1, len(self.training_handles))) self.training_handle = self.training_handles[self.current_handle] self.validation_handle = self.validation_handles[self.current_handle] # Break if early stopping hook requests stop after sess.run() if self.sess.should_stop(): break # Specify feed dictionary fd = {self.dataset_handle: self.training_handle, self.training: True, self.z: np.zeros([self.batch_size, self.z_dim]), self.kl_wt: self.kl_weight} # Save summaries, display progress, and update model if (step % self.summary_step == 0) and (step % self.display_step == 0): summary, kl_loss, i_loss, b_loss, loss, _ = self.sess.run([self.merged_summaries, self.kl_loss, self.interior_loss, self.boundary_loss, self.loss, self.optim], feed_dict=fd) if self.use_kl: print("Step %d: %.7f [kl_loss] %.7f [i_loss] %.7f [b_loss] %.7f [loss] " %(step,kl_loss,i_loss,b_loss,loss)) else: print("Step %d: %.7f [i_loss] %.7f [b_loss] %.7f [loss] " %(step,i_loss,b_loss,loss)) self.writer.add_summary(summary, step); self.writer.flush() if np.isnan(loss): raise ValueError("NaN loss value encountered at global step %d." %(step)) # Save summaries and update model elif step % self.summary_step == 0: summary, _ = self.sess.run([self.merged_summaries, self.optim], feed_dict=fd) self.writer.add_summary(summary, step); self.writer.flush() # Display progress and update model elif step % self.display_step == 0: kl_loss, i_loss, b_loss, loss, _ = self.sess.run([self.kl_loss, self.interior_loss, self.boundary_loss, self.loss, self.optim], feed_dict=fd) if self.use_kl: print("Step %d: %.7f [kl_loss] %.7f [i_loss] %.7f [b_loss] %.7f [loss] " %(step,kl_loss,i_loss,b_loss,loss)) else: print("Step %d: %.7f [i_loss] %.7f [b_loss] %.7f [loss] " %(step,i_loss,b_loss,loss)) # Update model else: self.sess.run([self.optim], feed_dict=fd) # Break if early stopping hook requests stop after sess.run() if self.sess.should_stop(): break # Plot predictions #if step % self.plot_step == 0: # #self.plot_comparisons(step) # self.plot_data(step, handle=self.training_handle) # Break if early stopping hook requests stop after sess.run() if self.sess.should_stop(): break if step % self.summary_step == 0: if step >= self.kl_start_step: # Save validation summaries and update kl_weight to avoid underfitting fd = {self.dataset_handle: self.validation_handle, self.z: np.zeros([self.batch_size, self.z_dim]), self.training: False, self.kl_wt: self.kl_weight} vsummary, vkl_l, vi_l = self.sess.run([self.merged_summaries, self.kl_loss, self.interior_loss], feed_dict=fd) self.vwriter.add_summary(vsummary, step); self.vwriter.flush() if self.use_kl_decay: self.kl_weight = np.min([self.kl_weight, 0.5*self.kl_weight*(0.4*vi_l/vkl_l)]) self.sess.run(self.assign_kl_wt_value, feed_dict={self.kl_wt: self.kl_weight}) else: # Save validation summaries and update kl_weight to avoid underfitting fd = {self.dataset_handle: self.validation_handle, self.z: np.zeros([self.batch_size, self.z_dim]), self.training: False, self.kl_wt: self.kl_weight} vsummary = self.sess.run(self.merged_summaries, feed_dict=fd) self.vwriter.add_summary(vsummary, step); self.vwriter.flush() if self.validation_checks: if step % self.evaluation_step == 0: self.evaluate_validation(step)
def train(self): # Define summary writer for saving log files (for training and validation) self.writer = tf.summary.FileWriter(os.path.join( self.log_dir, 'training/'), graph=tf.get_default_graph()) self.vwriter = tf.summary.FileWriter(os.path.join( self.log_dir, 'validation/'), graph=tf.get_default_graph()) # Show list of all variables and total parameter count show_variables() print("\n[ Initializing Variables ]\n") # Get handles for training and validation datasets self.training_handle, self.validation_handle = self.sess.run( [self.dataset.string_handle(), self.vdataset.string_handle()]) # Iterate through training steps while not self.sess.should_stop(): # Update global step step = tf.train.global_step(self.sess, self.global_step) # Break if early stopping hook requests stop after sess.run() if self.sess.should_stop(): break # Specify feed dictionary fd = { self.dataset_handle: self.training_handle, self.training: True } # Save summaries, display progress, and update model if (step % self.summary_step == 0) and (step % self.display_step == 0): summary, loss, _ = self.sess.run( [self.merged_summaries, self.loss, self.optim], feed_dict=fd) print("Step %d: %.10f [loss] " % (step, loss)) self.writer.add_summary(summary, step) self.writer.flush() # Save summaries and update model elif step % self.summary_step == 0: summary, _ = self.sess.run([self.merged_summaries, self.optim], feed_dict=fd) self.writer.add_summary(summary, step) self.writer.flush() # Display progress and update model elif step % self.display_step == 0: loss, _ = self.sess.run([self.loss, self.optim], feed_dict=fd) print("Step %d: %.10f [loss] " % (step, loss)) # Update model else: self.sess.run([self.optim], feed_dict=fd) # Break if early stopping hook requests stop after sess.run() if self.sess.should_stop(): break # Plot predictions if step % self.plot_step == 0: self.plot_predictions() # Save validation summaries if step % self.summary_step == 0: fd = { self.dataset_handle: self.validation_handle, self.training: False } vsummary = self.sess.run(self.merged_summaries, feed_dict=fd) self.vwriter.add_summary(vsummary, step) self.vwriter.flush()
def train(self): # Define summary writer for saving log files (for training and validation) self.writer = tf.summary.FileWriter(os.path.join( self.log_dir, 'training/'), graph=tf.get_default_graph()) self.vwriter = tf.summary.FileWriter(os.path.join( self.log_dir, 'validation/'), graph=tf.get_default_graph()) # Show list of all variables and total parameter count show_variables() print("\n[ Initializing Variables ]\n") # Get handles for training and validation datasets self.training_handle, self.validation_handle = self.sess.run( [self.dataset.string_handle(), self.vdataset.string_handle()]) # Iterate through training steps while not self.sess.should_stop(): # Update global step step = tf.train.global_step(self.sess, self.global_step) # Generate random samples for generator input and specify feed dictionary z_batch = self.sample_z(self.batch_size) fd = { self.dataset_handle: self.training_handle, self.z: z_batch, self.training: True } # Save summariesm display progress and update model if (step % self.summary_step == 0) and (step % self.display_step == 0): summary, d_loss, g_loss, _, __ = self.sess.run([ self.merged_summaries, self.d_loss, self.g_loss, self.d_optim, self.g_optim ], feed_dict=fd) print("Step %d: %.10f [d_loss] %.10f [g_loss] " % (step, d_loss, g_loss)) self.writer.add_summary(summary, step) self.writer.flush() # Save summaries and update model elif step % self.summary_step == 0: summary, _, __ = self.sess.run( [self.merged_summaries, self.d_optim, self.g_optim], feed_dict=fd) self.writer.add_summary(summary, step) self.writer.flush() # Display progress and update model elif step % self.display_step == 0: d_loss, g_loss, _, __ = self.sess.run( [self.d_loss, self.g_loss, self.d_optim, self.g_optim], feed_dict=fd) print("Step %d: %.10f [d_loss] %.10f [g_loss] " % (step, d_loss, g_loss)) # Update model else: self.sess.run([self.d_optim, self.g_optim], feed_dict=fd) # Plot predictions if step % self.plot_step == 0: self.plot_predictions(step) # Save validation summaries if step % self.summary_step == 0: fd = { self.dataset_handle: self.validation_handle, self.z: z_batch, self.training: False } vsummary = self.sess.run(self.merged_summaries, feed_dict=fd) self.vwriter.add_summary(vsummary, step) self.vwriter.flush()