def create_generator_loss( self, x: tf.Tensor, y: tf.Tensor, generator: tf.Tensor, generator_discriminator: tf.Tensor, apply_summary: bool = True, use_gpu: bool = True, data_format: str = "NHWC" ) -> Union[tf.Tensor, Tuple[tf.Tensor, List[tf.Tensor]]]: summary_ops = [] with tf.device(select_device(use_gpu)): loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=generator_discriminator, labels=tf.ones_like(generator_discriminator))) if self._l1_lambda > 0.0: _log.info(f"Using l1 loss, lambda={self._l1_lambda}") with tf.device(select_device(use_gpu)): l1_loss = tf.reduce_mean(tf.abs(tf.subtract(y, generator))) with use_cpu(): if apply_summary: summary_ops.append( tf.summary.scalar("generator_loss_l1", l1_loss)) summary_ops.append( tf.summary.scalar("generator_loss_discriminator", loss)) loss = loss + tf.constant(self._l1_lambda, dtype=tf.float32) * l1_loss if apply_summary: summary_ops.append(tf.summary.scalar("generator_loss", loss)) return loss, summary_ops else: return loss
def create_generator_loss( self, x: tf.Tensor, y: tf.Tensor, generator: tf.Tensor, generator_discriminator: tf.Tensor, apply_summary: bool = True, use_gpu: bool = True, data_format: str = "NHWC" ) -> Union[tf.Tensor, Tuple[tf.Tensor, List[tf.Tensor]]]: with tf.device(select_device(use_gpu)): loss = tf.reduce_mean( # Generator should have 0 loss tf.nn.sigmoid_cross_entropy_with_logits( logits=generator_discriminator, labels=tf.zeros_like(generator_discriminator))) if apply_summary: return loss, [tf.summary.scalar("generator_loss", loss)] else: return loss
def create_discriminator_loss( self, x: tf.Tensor, y: tf.Tensor, generator: tf.Tensor, generator_discriminator: tf.Tensor, real_discriminator: tf.Tensor, apply_summary: bool = True, use_gpu: bool = True, data_format: str = "NHWC" ) -> Union[tf.Tensor, Tuple[tf.Tensor, List[tf.Tensor]]]: with use_cpu(): batch_size = tf.shape(x)[0] epsilons = tf.random_uniform((batch_size, ), 0.0, 1.0, seed=self._seed) gradient_input = tf.multiply(epsilons, y) + tf.multiply( tf.ones_like(epsilons) - epsilons, generator) with tf.device(select_device(use_gpu)): gradient_discriminator, gradient_discriminator_input = self._network_factory.create_discriminator( x, gradient_input, reuse=True, return_input_tensor=True, use_gpu=use_gpu, data_format=data_format) generator_discriminator = tf.reshape(generator_discriminator, shape=[-1]) # Generated samples should be close to their inverse jaccard index => Loss defined # Generator samples have to be first converted into range [0, 1] generated_jaccard_index = jaccard_index( values=tanh_to_sigmoid(generator), labels=tanh_to_sigmoid(y)) generated_jaccard_loss = tf.ones_like( generated_jaccard_index) - generated_jaccard_index loss_generated = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=generator_discriminator, labels=generated_jaccard_loss)) # Real samples should all be 0 => No loss loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=real_discriminator, labels=tf.zeros_like(real_discriminator))) gradient = tf.gradients(gradient_discriminator, gradient_discriminator_input) gradient_norm = tf.norm(gradient) gradient_penalty_raw = tf.reduce_mean( (gradient_norm - tf.ones_like(gradient_norm))**2) with use_cpu(): gradient_penalty = tf.multiply( tf.constant(self._gradient_lambda, dtype=tf.float32), gradient_penalty_raw) loss = loss_generated + loss_real + gradient_penalty if apply_summary: summary_operations = [ tf.summary.scalar("discriminator_loss_real", loss_real), tf.summary.scalar("discriminator_loss_generated", loss_generated), tf.summary.scalar("discriminator_gradient_penalty", gradient_penalty), tf.summary.scalar("discriminator_gradient_norm", gradient_norm), tf.summary.scalar("discriminator_loss", loss) ] return loss, summary_operations else: return loss
def create_discriminator_loss( self, x: tf.Tensor, y: tf.Tensor, generator: tf.Tensor, generator_discriminator: tf.Tensor, real_discriminator: tf.Tensor, apply_summary: bool = True, use_gpu: bool = True, data_format: str = "NHWC" ) -> Union[tf.Tensor, Tuple[tf.Tensor, List[tf.Tensor]]]: with use_cpu(): batch_size = tf.shape(x)[0] epsilons = tf.random_uniform((batch_size, ), 0.0, 1.0, seed=self._seed) gradient_input = tf.multiply(epsilons, y) + tf.multiply( tf.ones_like(epsilons) - epsilons, generator) with tf.device(select_device(use_gpu)): gradient_discriminator, gradient_discriminator_input = self._network_factory.create_discriminator( x, gradient_input, reuse=True, return_input_tensor=True, use_gpu=use_gpu, data_format=data_format) loss_generated = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=generator_discriminator, labels=tf.zeros_like(generator_discriminator))) loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=real_discriminator, labels=tf.ones_like(real_discriminator))) gradient = tf.gradients(gradient_discriminator, gradient_discriminator_input) gradient_norm = tf.norm(gradient) gradient_penalty_raw = tf.reduce_mean( (gradient_norm - tf.ones_like(gradient_norm))**2) with use_cpu(): gradient_penalty = tf.multiply( tf.constant(self._gradient_lambda, dtype=tf.float32), gradient_penalty_raw) loss = loss_generated + loss_real + gradient_penalty if apply_summary: summary_operations = [ tf.summary.scalar("discriminator_loss_real", loss_real), tf.summary.scalar("discriminator_loss_generated", loss_generated), tf.summary.scalar("discriminator_gradient_penalty", gradient_penalty), tf.summary.scalar("discriminator_gradient_norm", gradient_norm), tf.summary.scalar("discriminator_loss", loss) ] return loss, summary_operations else: return loss
def train(self): """ Trains the model until a maximum number of iterations as specified in the training options is reached. This method requires this trainer to have __enter__ called previously, otherwise no session exists and calls to this method will fail. """ if self._sess is None: raise RuntimeError("A running session is required to start training") # Start input enqueue threads. coord = tf.train.Coordinator() _log.info("Starting queue runners...") threads = tf.train.start_queue_runners(sess=self._sess, coord=coord) save_model_path = os.path.join(self._training_options.summary_directory, "model.ckpt") save_image_path = os.path.join(self._training_options.summary_directory, "images/") if not os.path.exists(save_image_path): os.makedirs(save_image_path) with tf.device(select_device(self._training_options.use_gpu)): init_ops = (tf.global_variables_initializer(), tf.local_variables_initializer()) tf.get_default_graph().finalize() if self._restored_iteration is None: self._sess.run(init_ops) iteration = self._sess.run(self._global_step) _log.info("Starting training") try: while not coord.should_stop(): current_iteration = iteration iteration = self._sess.run(self._step_op) if current_iteration % self._training_options.save_model_interval == 0: self._train_saver.save(self._sess, save_model_path, global_step=self._global_step) _log.info(f"Saved model from iteration {iteration}") # Run CV validation if current_iteration % self._training_options.cv_summary_interval == 0: _log.info("Evaluating CV set...") # TODO: Make configurable? IMAGE_OUTPUT_COUNT = 5 image_output_iterations = min(IMAGE_OUTPUT_COUNT, self._cv_pipeline.sample_count) # Generate individual summaries cv_summaries = [] cv_images = [] for _ in range(image_output_iterations): current_summary, current_image = self._sess.run( (self._cv_summary, self._concatenated_images_op)) cv_summaries.append(current_summary) cv_images.append(current_image) for _ in range(image_output_iterations, self._cv_pipeline.sample_count): cv_summaries.append(self._sess.run(self._cv_summary)) # Convert summaries into numpy array and calculate the average for all tags # TODO: Validate that only scalar summaries summary_tags = [entry.tag for entry in tf.Summary.FromString(cv_summaries[0]).value] tag_index_mapping = {tag: idx for idx, tag in enumerate(summary_tags)} summary_values = np.zeros((len(cv_summaries), len(summary_tags)), dtype=np.float32) for idx, current_summary in enumerate(cv_summaries): for entry in tf.Summary.FromString(current_summary).value: summary_values[idx, tag_index_mapping[entry.tag]] = entry.simple_value real_summary = np.mean(summary_values, axis=0) # Create output summary proto value_list = [ tf.Summary.Value(tag=tag, simple_value=real_summary[idx]) for idx, tag in enumerate(summary_tags)] result_proto = tf.Summary(value=value_list) # Write summary self._cv_summary_writer.add_summary(result_proto, current_iteration) # Write images # TODO: Don't use hardcoded size for idx, current_image in enumerate(cv_images): output_image = np.reshape(current_image, (512, 512 * 4, 3)) Image.fromarray(output_image, "RGB").save( os.path.join(save_image_path, f"sample_{idx:02d}_{iteration:08d}.png")) # Train discriminator for _ in range(self._training_options.discriminator_iterations): self._sess.run(self._op_discriminator) # Train generator, optionally output summary if current_iteration % self._training_options.training_summary_interval == 0: _, current_summary = self._sess.run([self._op_generator, self._train_summary]) self._train_summary_writer.add_summary(current_summary, iteration) _log.info(f"Iteration {iteration} done") else: self._sess.run(self._op_generator) # Check for iteration limit reached if iteration > self._training_options.max_iterations: coord.request_stop() except Exception as ex: coord.request_stop(ex) finally: coord.request_stop() _log.info("Waiting for threads to finish...") coord.join(threads) # Close writers AFTER threads stopped to make sure summaries are written self._train_summary_writer.close() self._cv_summary_writer.close() _log.info("Training finished")
def __init__( self, training_pipeline: InputPipeline, cv_pipeline: InputPipeline, network_factory: NetworkFactory, objective_factory: ObjectiveFactory, training_options: TrainingOptions, learning_rate: float, beta1: float = 0.9, beta2: float = 0.999): """ Create a new network trainer :param training_pipeline: Input pipeline used for training :param cv_pipeline: Input pipeline used for cross-validation :param network_factory: Factory to create training and evaluation networks :param objective_factory: Factory to create generator and discriminator losses :param training_options: Options controlling the training process :param learning_rate: Learning rate to use in the Adam optimizer :param beta1: Beta1 to use in the Adam optimizer :param beta2: Beta2 to use in the Adam optimizer """ self._training_options = training_options self._restored_iteration = None # Create input pipelines with use_cpu(): self._training_pipeline = training_pipeline self._train_x, self._train_y, _ = training_pipeline.create_pipeline() self._cv_pipeline = cv_pipeline self._cv_x, self._cv_y, _ = self._cv_pipeline.create_pipeline() # Create training graph with tf.name_scope("training"): # Create networks self._generator = network_factory.create_generator(self._train_x, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) self._discriminator_generated = network_factory.create_discriminator( self._train_x, self._generator, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) self._discriminator_real = network_factory.create_discriminator( self._train_x, self._train_y, reuse=True, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) # Create losses self._generator_loss, generator_summary = objective_factory.create_generator_loss( self._train_x, self._train_y, self._generator, self._discriminator_generated, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) self._discriminator_loss, discriminator_summary = objective_factory.create_discriminator_loss( self._train_x, self._train_y, self._generator, self._discriminator_generated, self._discriminator_real, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) with tf.device(select_device(self._training_options.use_gpu)): # Create optimizers trainable_variables = tf.trainable_variables() variables_discriminator = [var for var in trainable_variables if var.name.startswith("discriminator")] variables_generator = [var for var in trainable_variables if var.name.startswith("generator")] self._optimizer_generator = tf.train.AdamOptimizer(learning_rate, beta1, beta2, name="adam_generator") self._optimizer_discriminator = tf.train.AdamOptimizer(learning_rate, beta1, beta2, name="adam_discriminator") self._op_generator = self._optimizer_generator.minimize(self._generator_loss, var_list=variables_generator) self._op_discriminator = self._optimizer_discriminator.minimize(self._discriminator_loss, var_list=variables_discriminator) with use_cpu(): # Iteration counter self._global_step = tf.Variable(0, trainable=False, name="global_step", dtype=tf.int64) self._step_op = tf.assign_add(self._global_step, 1) # Create summary operation accuracy, precision, recall, f1_score, specificity, jaccard_similarity = _create_summaries(self._generator, self._train_y) summary_operations = [ tf.summary.scalar("accuracy", accuracy), tf.summary.scalar("precision", precision), tf.summary.scalar("recall", recall), tf.summary.scalar("f1_score", f1_score), tf.summary.scalar("specificity", specificity), tf.summary.scalar("jaccard_similarity", jaccard_similarity) ] self._train_saver = tf.train.Saver(keep_checkpoint_every_n_hours=1) # Merge summaries self._train_summary = tf.summary.merge(summary_operations + generator_summary + discriminator_summary) self._train_summary_writer = tf.summary.FileWriter( os.path.join(self._training_options.summary_directory, "training"), graph=tf.get_default_graph()) # Create CV graph with tf.name_scope("cv"): # Create networks generator = network_factory.create_generator( self._cv_x, reuse=True, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) discriminator_generated = network_factory.create_discriminator( self._cv_x, generator, reuse=True, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) discriminator_real = network_factory.create_discriminator( self._cv_x, self._cv_y, reuse=True, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) # Create losses _, generator_summary = objective_factory.create_generator_loss( self._cv_x, self._cv_y, generator, discriminator_generated, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) _, discriminator_summary = objective_factory.create_discriminator_loss( self._cv_x, self._cv_y, generator, discriminator_generated, discriminator_real, use_gpu=self._training_options.use_gpu, data_format=self._training_options.data_format) # Create other summary options accuracy, precision, recall, f1_score, specificity, jaccard_similarity = _create_summaries(generator, self._cv_y) # Create summary operation summary_operations = [ tf.summary.scalar("accuracy", accuracy), tf.summary.scalar("precision", precision), tf.summary.scalar("recall", recall), tf.summary.scalar("f1_score", f1_score), tf.summary.scalar("specificity", specificity), tf.summary.scalar("jaccard_similarity", jaccard_similarity) ] with use_cpu(): # Concatenated images self._concatenated_images_op = _create_concatenated_images( self._cv_x, self._cv_y, generator, self._cv_pipeline.color_converter, self._training_options.data_format ) # Merge summaries self._cv_summary = tf.summary.merge(summary_operations + generator_summary + discriminator_summary) self._cv_summary_writer = tf.summary.FileWriter( os.path.join(self._training_options.summary_directory, "cv"))