def get_data(self, b_size, augment=False, standardize=False, repeat=False, num_threads=4, seed=None): """ Returns iterators on the dataset along with their initializers. :param b_size: batch size :param augment: if to perform data augmentation :param standardize: if to standardize the input data :param repeat: (bool) whether to repeat the input indefinitely :param num_threads: for parallel computing :param seed: (int or placeholder) seed for the random operations :return: train_init, valid_init, input_data, label """ with tf.name_scope('acdc_data'): _train_images = tf.constant(self.x_train, dtype=tf.float32) _train_masks = tf.constant(self.y_train, dtype=tf.float32) _valid_images = tf.constant(self.x_validation, dtype=tf.float32) _valid_masks = tf.constant(self.y_validation, dtype=tf.float32) train_data = tf.data.Dataset.from_tensor_slices((_train_images, _train_masks)) valid_data = tf.data.Dataset.from_tensor_slices((_valid_images, _valid_masks)) train_data = train_data.shuffle(buffer_size=len(self.x_train), seed=seed) if standardize: print("Data won't be standardized, as they already have been pre-processed.") if augment: train_data = train_data.map(lambda x, y: self._data_augmentation_ops(x, y), num_parallel_calls=num_threads) seed2 = seed + 1 train_data = train_data.shuffle(buffer_size=len(self.x_train), seed=seed2) if repeat: print_yellow_text(' --> Repeat the input indefinitely = True', sep=False) train_data = train_data.repeat() # Repeat the input indefinitely train_data = train_data.batch(b_size, drop_remainder=True) valid_data = valid_data.batch(b_size, drop_remainder=True) # if len(get_available_gpus()) > 0: # # prefetch data to the GPU # # train_data = train_data.apply(tf.data.experimental.prefetch_to_device("/gpu:0")) # train_data = train_data.apply(tf.data.experimental.copy_to_device("/gpu:0")).prefetch(1) iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) _input_data, _output_data = iterator.get_next() train_init = iterator.make_initializer(train_data) # initializer for train_data valid_init = iterator.make_initializer(valid_data) # initializer for test_data with tf.name_scope('input_sup'): input_data = tf.reshape(_input_data, shape=[-1, self.input_size[0], self.input_size[1], 1]) input_data = tf.cast(input_data, tf.float32) with tf.name_scope('output_sup'): output_data = tf.reshape(_output_data, shape=[-1, self.input_size[0], self.input_size[1], 4]) output_data = tf.cast(output_data, tf.float32) return train_init, valid_init, input_data, output_data
def main(): print('\nBuilding SUPERVISED sets.') build_sup_sets() print('\nBuilding UNSUPERVISED sets.') build_unsup_sets() print('\nBuilding DISCRIMINATOR sets.') build_disc_sets() print_yellow_text('\nDone.\n', sep=False)
def main(): print('\nBuilding SUPERVISED sets.') # build_sup_sets() print('\nBuilding UNSUPERVISED sets.') build_unsup_sets() print('\nBuilding DISCRIMINATOR sets.') build_disc_sets( ) # texture is created from the discriminator dataset, keep the same slice order print_yellow_text('\nDone.\n', sep=False)
def train(self, n_epochs): """ The train function alternates between training one epoch and evaluating """ print( "\nStarting network training... Number of epochs to train: \033[94m{0}\033[0m" .format(n_epochs)) print("Tensorboard verbose mode: \033[94m{0}\033[0m".format( self.tensorboard_verbose)) print("Tensorboard dir: \033[94m{0}\033[0m".format(self.graph_dir)) print( "Data augmentation: \033[94m{0}\033[0m, Data standardization: \033[94m{1}\033[0m." .format(self.augment, self.standardize)) utils.safe_mkdir(self.checkpoint_dir) utils.safe_mkdir(self.history_log_dir) writer = tf.summary.FileWriter(self.graph_dir, tf.get_default_graph()) # config for the session: allow growth for GPU to avoid OOM when other processes are running config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # to continue last training saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(self.last_checkpoint_dir + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: # to train the main model from scratch # restore AE variables = tf.contrib.framework.get_variables_to_restore() variables_to_restore = [ v for v in variables if v.name.split('/')[0] == 'texture_ae' or v.name.split('/')[0] == 'label_ae' ] ae_saver = tf.train.Saver( variables_to_restore) # keep_checkpoint_every_n_hours=2 ckpt_ae = tf.train.get_checkpoint_state( os.path.dirname(self.ae_checkpoint_dir + '/checkpoint')) if ckpt_ae and ckpt_ae.model_checkpoint_path: ae_saver.restore(sess, ckpt_ae.model_checkpoint_path) trained_epochs = self.g_epoch.eval( ) # global step is also saved in checkpoint print( "Model already trained for \033[94m{0}\033[0m epochs.".format( trained_epochs)) t_step = self.g_train_step.eval() # global step for train v_step = self.g_valid_step.eval() # global step for validation test_step = self.g_test_step.eval() # global step for test # Define a caller to call the callbacks self.callbacks_kwargs.update({'sess': sess, 'cnn': self}) caller = tf_callbacks.ChainCallback(callbacks=self.callbacks) caller.on_train_begin(training_state=True, **self.callbacks_kwargs) # trick to find performance bugs: this will raise an exception if any new node is inadvertently added to the # graph. This will ensure that I don't add many times the same node to the graph (which could be expensive): tf.get_default_graph().finalize() # saving callback: self.callbacks_kwargs[ 'es_loss'] = 100 # some random initialization for epoch in range(n_epochs): ep_str = str(epoch + 1) if ( trained_epochs == 0 ) else '({0}+) '.format(trained_epochs) + str(epoch + 1) print('_' * 40 + '\n\033[1;33mEPOCH {0}\033[0m - {1} : '.format( ep_str, self.run_id)) caller.on_epoch_begin(training_state=True, **self.callbacks_kwargs) global_ep = sess.run(self.g_epoch) self.callbacks_kwargs['es_loss'] = sess.run(self.best_val_loss) seed = global_ep # TRAINING ------------------------------------------ iterator_init_list = [ self.disc_train_init, self.unsup_train_init ] t_step = self.train_one_epoch(sess, iterator_init_list, writer, t_step, caller, seed) # VALIDATION ------------------------------------------ if global_ep >= 15 or not ( (global_ep + 1) % 5): # when to evaluate the model iterator_init_list = [ self.disc_valid_init, self.unsup_valid_init ] v_step, val_loss = self.eval_once(sess, iterator_init_list, writer, v_step, caller) self.callbacks_kwargs['es_loss'] = val_loss sess.run(self.update_best_val_loss, feed_dict={'best_val_loss_value:0': val_loss}) if self.tensorboard_verbose and (global_ep % 20 == 0): # writing summary for the weights: summary = sess.run(self.weights_summary) writer.add_summary(summary, global_step=t_step) try: caller.on_epoch_end(training_state=True, **self.callbacks_kwargs) except EarlyStoppingException: utils.print_yellow_text('\nEarly stopping...\n') break except NeedForTestException: # found new best model, save it saver.save(sess, self.checkpoint_dir + '/checkpoint', t_step) caller.on_train_end(training_state=True, **self.callbacks_kwargs) # end of the training: save the current weights in a new sub-directory utils.safe_mkdir(self.checkpoint_dir + '/last_model') saver.save(sess, self.checkpoint_dir + '/last_model/checkpoint', t_step) # load best model and do a test: ckpt = tf.train.get_checkpoint_state( os.path.dirname(self.checkpoint_dir + '/checkpoint')) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) _ = self.test_once(sess, [self.disc_test_init, self.unsup_test_init], writer, test_step, caller) writer.close()
37, 50, 53, 100, 38, 19, 61, 74, 97, 31, 91, 35, 56, 94, 26, 69, 46, 59, 4, 89, 71, 6, 52, 43, 45, 63, 93, 14, 98, 88, 21, 28, 99, 54, 90, 2, 76, 34, 85, 70, 86, 3, 8, 51, 40, 7, 13, 47, 55, 12, 58, 87, 9, 65, 62, 33, 42, 23, 92, 29, 11, 83, 68, 75, 67, 16, 48, 66, 20, 15 ] } ] return splits if __name__ == '__main__': default_data_folder = 'training' print_yellow_text('\nSplitting the data in: train, validation, test ...', sep=False) # create split splits_ids = get_splits()[0] subdir_list = [d for d in glob(root + default_data_folder + '/*')] dset_list = [ 'train_sup', 'train_disc', 'train_unsup', 'validation', 'test' ] for dset in dset_list: try: os.makedirs(root + dset) except FileExistsError: os.system('rm -rf {0}'.format(root + dset)) os.makedirs(root + dset)
def get_data(self, b_size, augment=False, standardize=False, repeat=False, num_threads=4, seed=None): """ Returns iterators on the dataset along with their initializers. :param b_size: batch size :param augment: if to perform data augmentation :param standardize: if to standardize the input data :param repeat: (bool) whether to repeat the input indefinitely :param num_threads: for parallel computing :return: train_init, valid_init, input_data, label """ with tf.name_scope('acdc_data'): _train_data = tf.constant(self.x_train_paths) _valid_data = tf.constant(self.x_validation_paths) train_data = tf.data.Dataset.from_tensor_slices(_train_data) valid_data = tf.data.Dataset.from_tensor_slices(_valid_data) train_data = train_data.shuffle(buffer_size=len( self.x_train_paths), seed=seed) train_data = train_data.map( lambda filename: tf.py_func( # Parse the record into tensors self.data_parser, [filename, standardize, augment], [tf.float32]), num_parallel_calls=num_threads) valid_data = valid_data.map( lambda filename: tf.py_func( # Parse the record into tensors self.data_parser, [filename, standardize, False], [tf.float32]), num_parallel_calls=num_threads) # - - - - - - - - - - - - - - - - - - - - if augment: train_data = train_data.map(self._data_augmentation_ops, num_parallel_calls=num_threads) valid_data = valid_data.map( lambda v: tf.cast(v, dtype=tf.float32), num_parallel_calls=num_threads) seed2 = seed + 1 train_data = train_data.shuffle(buffer_size=len( self.x_train_paths), seed=seed2) if repeat: print_yellow_text(' --> Repeat the input indefinitely = True', sep=False) train_data = train_data.repeat( ) # Repeat the input indefinitely # un-batch first, then batch the data train_data = train_data.apply(tf.data.experimental.unbatch()) valid_data = valid_data.apply(tf.data.experimental.unbatch()) train_data = train_data.batch(b_size, drop_remainder=True) valid_data = valid_data.batch(b_size, drop_remainder=True) # if len(get_available_gpus()) > 0: # # prefetch data to the GPU # # train_data = train_data.apply(tf.data.experimental.prefetch_to_device("/gpu:0")) # train_data = train_data.apply(tf.data.experimental.copy_to_device("/gpu:0")).prefetch(1) iterator = tf.data.Iterator.from_structure( train_data.output_types, train_data.output_shapes) _input_data = iterator.get_next() train_init = iterator.make_initializer( train_data) # initializer for train_data valid_init = iterator.make_initializer( valid_data) # initializer for test_data with tf.name_scope('input_unsup'): input_data = tf.reshape( _input_data, shape=[-1, self.input_size[0], self.input_size[1], 1]) with tf.name_scope('output_unsup'): output_data = tf.reshape( _input_data, shape=[-1, self.input_size[0], self.input_size[1], 1]) return train_init, valid_init, input_data, output_data