def train(self, hparams_string): """ Run training of the network Args: Returns: """ args_train = hparams_parser_train(hparams_string) self.batch_size = args_train.batch_size self.epoch_max = args_train.epoch_max utils.save_model_configuration(args_train, self.dir_base) # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data) # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph. dataset = tf.data.TFRecordDataset(self.dateset_filenames) dataset = dataset.map(util_data.decode_image) # decoding the tfrecord dataset = dataset.map( self._preProcessData) # potential local preprocessing of data dataset = dataset.shuffle(buffer_size=10000, seed=None) dataset = dataset.batch(batch_size=self.batch_size) iterator = dataset.make_initializable_iterator() inputs = iterator.get_next() # depends on self._preProcessData [in_image, in_label] = inputs # show network architecture utils.show_all_variables() # define model, loss, optimizer and summaries. outputs = self._create_inference(in_image) loss = self._create_losses(outputs, in_label) optimizer_op = self._create_optimizer(loss) summary_op = self._create_summaries(loss) with tf.Session() as sess: # Initialize all model Variables. sess.run(tf.global_variables_initializer()) # Create Saver object for loading and storing checkpoints saver = tf.train.Saver() # Create Writer object for storing graph and summaries for TensorBoard writer = tf.summary.FileWriter(self.dir_logs, sess.graph) # Reload Tensor values from latest checkpoint ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints) epoch_start = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) ckpt_name = os.path.basename(ckpt.model_checkpoint_path) epoch_start = int(ckpt_name.split('-')[-1]) interationCnt = 0 # Do training loops for epoch_n in range(epoch_start, self.epoch_max): # Initiate or Re-initiate iterator sess.run(iterator.initializer) # Test model output before any training if epoch_n == 0: summary = sess.run(summary_op) writer.add_summary(summary, global_step=-1) utils.show_message( 'Running training epoch no: {0}'.format(epoch_n)) while True: try: _, summary = sess.run([optimizer_op, summary_op]) writer.add_summary(summary, global_step=interationCnt) counter = +1 except tf.errors.OutOfRangeError: # Do some evaluation after each Epoch break if epoch_n % 1 == 0: saver.save(sess, os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)
def train(self, hparams_string): """ Run training of the network Args: Returns: """ args_train = hparams_parser_train(hparams_string) self.batch_size = args_train.batch_size self.epoch_max = args_train.epoch_max self.unstructured_noise_dim = args_train.unstructured_noise_dim self.d_learning_rate = args_train.lr_discriminator self.g_learning_rate = args_train.lr_generator self.d_iter = args_train.d_iter self.n_testsamples = args_train.n_testsamples self.class_scale_d = args_train.class_scale_d self.class_scale_g = args_train.class_scale_g self.backup_frequency = args_train.backup_frequency utils.save_model_configuration(args_train, self.dir_base) # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data) # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph. dataset = tf.data.TFRecordDataset(self.dateset_filenames) dataset = dataset.map(util_data.decode_image) # decoding the tfrecord dataset = dataset.map(self._genLatentCodes) dataset = dataset.shuffle(buffer_size = 10000, seed = None) dataset = dataset.batch(batch_size = self.batch_size) iterator = dataset.make_initializable_iterator() input_getBatch = iterator.get_next() # Create input placeholders input_images = tf.placeholder( dtype = tf.float32, shape = [None] + self.image_dims, name = 'input_images') input_lbls = tf.placeholder( dtype = tf.float32, shape = [None, self.lbls_dim], name = 'input_lbls') input_unstructured_noise = tf.placeholder( dtype = tf.float32, shape = [None, self.unstructured_noise_dim], name = 'input_unstructured_noise') input_test_lbls = tf.placeholder( dtype = tf.float32, shape = [self.n_testsamples * self.lbls_dim, self.lbls_dim], name = 'input_test_lbls') input_test_noise = tf.placeholder( dtype = tf.float32, shape = [self.n_testsamples * self.lbls_dim, self.unstructured_noise_dim], name = 'input_test_noise') # Define model, loss, optimizer and summaries. logits_source, logits_class, _ = self._create_inference(input_images, input_lbls, input_unstructured_noise) loss_discriminator, loss_generator = self._create_losses(logits_source, logits_class, input_lbls) train_op_discriminator, train_op_generator = self._create_optimizer(loss_discriminator, loss_generator) summary_op_dloss, summary_op_gloss, summary_op_img, summary_img = self._create_summaries(loss_discriminator, loss_generator, input_test_noise, input_test_lbls) # show network architecture utils.show_all_variables() # create constant test variable to inspect changes in the model test_noise, test_lbls = self._genTestInput(self.lbls_dim, n_samples = self.n_testsamples) dir_results_train = os.path.join(self.dir_results, 'Training') utils.checkfolder(dir_results_train) with tf.Session() as sess: # Initialize all model Variables. sess.run(tf.global_variables_initializer()) # Create Saver object for loading and storing checkpoints saver = tf.train.Saver() # Create Writer object for storing graph and summaries for TensorBoard writer = tf.summary.FileWriter(self.dir_logs, sess.graph) # Reload Tensor values from latest checkpoint ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints) epoch_start = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) ckpt_name = os.path.basename(ckpt.model_checkpoint_path) epoch_start = int(ckpt_name.split('-')[-1]) + 1 interationCnt = 0 for epoch_n in range(epoch_start, self.epoch_max): # Test model output before any training if epoch_n == 0: summaryImg_tb, summaryImg = sess.run( [summary_op_img, summary_img], feed_dict={input_test_noise: test_noise, input_test_lbls: test_lbls}) writer.add_summary(summaryImg_tb, global_step=-1) utils.save_image_local(summaryImg, dir_results_train, 'Epoch_' + str(-1)) # Initiate or Re-initiate iterator sess.run(iterator.initializer) ### ---------------------------------------------------------- ### Update model print(datetime.datetime.now(),'- Running training epoch no:', epoch_n) while True: # for idx in range(0, num_batches): try: for _ in range(self.d_iter): image_batch, lbl_batch, unst_noise_batch = sess.run(input_getBatch) _, summary_dloss, _ = sess.run( [train_op_discriminator, summary_op_dloss], feed_dict={input_images: image_batch, input_lbls: lbl_batch, input_unstructured_noise: unst_noise_batch}) writer.add_summary(summary_dloss, global_step=interationCnt) _, summary_gloss = sess.run( [train_op_generator, summary_op_gloss], feed_dict={input_images: image_batch, input_lbls: lbl_batch, input_unstructured_noise: unst_noise_batch}) writer.add_summary(summary_gloss, global_step=interationCnt) interationCnt += 1 except tf.errors.OutOfRangeError: # Test current model summaryImg_tb, summaryImg = sess.run( [summary_op_img, summary_img], feed_dict={input_test_noise: test_noise, input_test_lbls: test_lbls}) writer.add_summary(summaryImg_tb, global_step=epoch_n) utils.save_image_local(summaryImg, dir_results_train, 'Epoch_' + str(epoch_n)) break # Save model variables to checkpoint if (epoch_n +1) % self.backup_frequency == 0: saver.save(sess,os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)
def train(self, hparams_string): """ Run training of the network Args: Returns: """ args_train = hparams_parser_train(hparams_string) self.batch_size = args_train.batch_size self.epoch_max = args_train.epoch_max self.use_imagenet = args_train.use_imagenet self.model_version = args_train.model_version utils.save_model_configuration(args_train, self.dir_base) # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data) # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph. dataset = tf.data.TFRecordDataset(self.dateset_filenames) dataset = dataset.map(util_data.decode_image) # decoding the tfrecord dataset = dataset.map( self._preProcessData) # potential local preprocessing of data dataset = dataset.shuffle(buffer_size=10000, seed=None) dataset = dataset.batch(batch_size=self.batch_size) iterator = dataset.make_initializable_iterator() input_getBatch = iterator.get_next() input_images = tf.placeholder(dtype=tf.float32, shape=[None] + self.image_dims, name='input_images') input_lbls = tf.placeholder(dtype=tf.float32, shape=[None, self.lbls_dim], name='input_lbls') # define model, loss, optimizer and summaries. output_logits = self._create_inference(input_images) loss = self._create_losses(output_logits, input_lbls) optimizer_op = self._create_optimizer(loss) summary_op = self._create_summaries(loss) # show network architecture utils.show_all_variables() if self.use_imagenet: if self.model_version == 'VGG16': path_imagenet_ckpt = os.path.join(self.dir_checkpoints, 'vgg_16.ckpt') if not tf.gfile.Exists(path_imagenet_ckpt): url_imagenet_model = "http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz" utils.download_and_uncompress_tarball( url_imagenet_model, self.dir_checkpoints) variables_to_restore = slim.get_model_variables('vgg_16') variables_to_restore = variables_to_restore[: -6] # ignore fc layers init_fn = slim.assign_from_checkpoint_fn( path_imagenet_ckpt, variables_to_restore) elif self.model_version == 'VGG19': path_imagenet_ckpt = os.path.join(self.dir_checkpoints, 'vgg_19.ckpt') if not tf.gfile.Exists(path_imagenet_ckpt): url_imagenet_model = "http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz" utils.download_and_uncompress_tarball( url_imagenet_model, self.dir_checkpoints) variables_to_restore = slim.get_model_variables('vgg_19') variables_to_restore = variables_to_restore[: -6] # ignore fc layers init_fn = slim.assign_from_checkpoint_fn( path_imagenet_ckpt, variables_to_restore) with tf.Session() as sess: # Initialize all model Variables. sess.run(tf.global_variables_initializer()) if self.use_imagenet: init_fn(sess) # Create Saver object for loading and storing checkpoints saver = tf.train.Saver() # Create Writer object for storing graph and summaries for TensorBoard writer = tf.summary.FileWriter(self.dir_logs, sess.graph) # Reload Tensor values from latest checkpoint ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints) epoch_start = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) ckpt_name = os.path.basename(ckpt.model_checkpoint_path) epoch_start = int(ckpt_name.split('-')[-1]) interationCnt = 0 # Do training loops for epoch_n in range(epoch_start, self.epoch_max): # Initiate or Re-initiate iterator sess.run(iterator.initializer) # Test model output before any training # if epoch_n == 0: # summary_loss = sess.run(summary_op) # writer.add_summary(summary_loss, global_step=-1) utils.show_message( 'Running training epoch no: {0}'.format(epoch_n)) while True: try: image_batch, lbl_batch = sess.run(input_getBatch) _, summary_loss = sess.run([optimizer_op, summary_op], feed_dict={ input_images: image_batch, input_lbls: lbl_batch }) writer.add_summary(summary_loss, global_step=interationCnt) counter = +1 except tf.errors.OutOfRangeError: # Do some evaluation after each Epoch break if epoch_n % 1 == 0: saver.save(sess, os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)
def train(self, hparams_string, preprocessing_params='', preprocessing_eval_params=''): """ Run training of the network Args: Returns: """ args_train = hparams_parser_train(hparams_string) self.batch_size = args_train.batch_size self.epoch_max = args_train.epoch_max self.model_version = args_train.model_version pretrained_model_path = args_train.pretrained_model use_pretrained_model = False if pretrained_model_path is '' else True pretrain_exclude_input = args_train.pretrain_exclude_input pretrain_exclude_output = args_train.pretrain_exclude_output optim_vars = args_train.optim_vars args_train.preprocessing = preprocessing_params args_train.preprocessing_eval = preprocessing_eval_params print('Training parameters:') print(args_train) utils.save_model_configuration(args_train, self.dir_base) # Load dataset if (self.dataset == 'PSD_Segmented'): DS = DS_PSDs.Dataset() elif (self.dataset == 'seeds_all'): DS = DS_Seeds.Dataset() elif (self.dataset == 'barley'): DS = DS_Barley.Dataset() elif (self.dataset == 'barley_abnormal'): DS = DS_Barley_Abnormal.Dataset() elif (self.dataset == 'barley_d0'): DS = DS_Barley_D0.Dataset() elif (self.dataset == 'barley_next'): DS = DS_Barley_Next.Dataset() elif (self.dataset == 'barley_next_stratified'): DS = DS_Barley_Next_Stratified.Dataset() elif (self.dataset == 'okra'): DS = DS_Okra.Dataset() elif (self.dataset == 'okra_abnormal'): DS = DS_Okra_Abnormal.Dataset() elif (self.dataset == 'okra_next'): DS = DS_Okra_next.Dataset() elif (self.dataset == 'okra_d0'): DS = DS_Okra_D0.Dataset() tf_dataset_list, dataset_sizes = DS.get_dataset_list(data_source = args_train.data_source, data_folder = args_train.data_folder, shuffle_before_split=args_train.shuffle_before_split, shuffle_seed=args_train.shuffle_seed, group_before_split=args_train.group_before_split, validation_method=args_train.validation_method, holdout_split=args_train.holdout_split, cross_folds=10, cross_val_fold=None, cross_test_fold=0, shard_val=args_train.shard_val, shard_test=args_train.shard_test, stratify_training_set=args_train.stratify_training_set) with tf.Session('') as tf_session: DS.save_dataset_filenames(os.path.join(self.dir_logs, 'filenames_training.txt'),tf_dataset_list[0], tf_session) DS.save_dataset_filenames(os.path.join(self.dir_logs, 'filenames_validation.txt'),tf_dataset_list[1], tf_session) DS.save_dataset_filenames(os.path.join(self.dir_logs, 'filenames_test.txt'),tf_dataset_list[2], tf_session) class_dicts = DS.get_class_dicts() num_classes = [len(class_dict) for class_dict in class_dicts] preprocessing = preprocess_factory.preprocess_factory() if not (preprocessing_params == ''): # Setup preprocessing pipeline preprocessing.prep_pipe_from_string(preprocessing_params) with tf.name_scope('Training_dataset'): tf_dataset_train = tf_dataset_list[0] tf_dataset_train = tf_dataset_train.shuffle(buffer_size = 10000, seed = None) tf_dataset_train = tf_dataset_train.map(DS._decode_from_TFexample) tf_dataset_train = tf_dataset_train.map(preprocessing.pipe) tf_dataset_train = tf_dataset_train.batch(batch_size = self.batch_size, drop_remainder=False) tf_dataset_train = tf_dataset_train.repeat(count=-1) # -1 --> repeat indefinitely # tf_dataset_train = tf_dataset_train.prefetch(buffer_size=3) tf_dataset_train_iterator = tf_dataset_train.make_one_shot_iterator() input_getBatch = tf_dataset_train_iterator.get_next() # Setup preprocessing pipeline preprocessing_eval = preprocess_factory.preprocess_factory() if not (preprocessing_eval_params == ''): preprocessing_eval.prep_pipe_from_string(preprocessing_eval_params) elif not (preprocessing_params ==''): # Use same preprocessing as training step, if it is not specified for validation step preprocessing_eval.prep_pipe_from_string(preprocessing_params) else: pass # If no preprocessing is specified, dont to any preprocessing with tf.name_scope('Validation_dataset'): tf_dataset_val = tf_dataset_list[1] if (tf_dataset_val is not None): tf_dataset_val = tf_dataset_val.map(DS._decode_from_TFexample) tf_dataset_val = tf_dataset_val.map(preprocessing_eval.pipe) tf_dataset_val = tf_dataset_val.batch(batch_size = self.batch_size, drop_remainder=False) tf_dataset_val = tf_dataset_val.repeat(count=-1) # -1 --> repeat indefinitely # tf_dataset_val = tf_dataset_val.prefetch(buffer_size=3) tf_dataset_val_iterator = tf_dataset_val.make_one_shot_iterator() tf_input_getBatch_val = tf_dataset_val_iterator.get_next() # Define input and output layers input_images = tf.placeholder( dtype = tf.float32, shape = [None] + self.image_dims, name = 'input_images') input_lbls = [] for i, N_classes in enumerate(num_classes): input_lbls.append( tf.placeholder( dtype = tf.uint8, shape = [None, 1], # shape = [None, N_classes], name = 'input_lbls' + str(i) ) ) tf_is_training = tf.placeholder( dtype = tf.bool, shape = (), name = 'is_training_flag' ) # define model model and load pre-trained model output_logits, endpoints, input_layer_name, output_layer_names = self._create_inference(input_images, is_training=tf_is_training, num_classes=num_classes, global_pool=args_train.global_pool) if (use_pretrained_model): exclude_layers = [] if (pretrain_exclude_input): exclude_layers += input_layer_name if (pretrain_exclude_output): exclude_layers += output_layer_names output_logits, model_vars_restored, model_vars_not_restored = self._load_pretrained_model(output_logits, pretrained_model_path, exclude_layers) #['resnet_v1_50/conv1','resnet_v1_50/logits']) #['resnet_v1_50/conv1','resnet_v1_50/logits']) else: model_vars_restored = [] model_vars_not_restored = [value for key,value in endpoints.items()] # Setup loss function loss = self._create_losses(output_logits, input_lbls, num_classes) # Setup optimizer variables_to_optimize = None if (optim_vars == 'all'): variables_to_optimize = None elif (optim_vars == 'non_restored'): variables_to_optimize = model_vars_not_restored else: raise NotImplementedError('Value set for optim_vars not implemented. Value = ' + optim_vars) optimizer_op = self._create_optimizer(loss, variables_to_optimize=variables_to_optimize, learning_rate=args_train.learning_rate) # Setup summaries CMatsTrain = [CM.confusionmatrix(N_classes) for N_classes in num_classes] CMatsVal = [CM.confusionmatrix(N_classes) for N_classes in num_classes] tf_loss = tf.placeholder(tf.float32, name='loss_mean') tf_accuracies = [] tf_recalls = [] tf_precisions = [] tf_F1s = [] tf_cs_categories = [] for i, N_classes in enumerate(num_classes): tf_accuracies.append(tf.placeholder(dtype = tf.float32, name = 'Overview/Accuracy' + str(i)) ) with tf.name_scope('output_' + str(i)): tf_recall, tf_chart_recall = tf_custom_summaries.class_score_mmm('Recall') tf_recalls.append(tf_recall) tf_precision, tf_chart_precision = tf_custom_summaries.class_score_mmm('Precision') tf_precisions.append(tf_precision) tf_F1, tf_chart_F1 = tf_custom_summaries.class_score_mmm('F1') tf_F1s.append(tf_F1) tf_cs_categories.append( tf_custom_summaries.layout_pb2.Category( title='output' + str(i), chart=[tf_chart_F1, tf_chart_precision, tf_chart_recall] ) ) summary_list = tf_accuracies summary_dict = {'Overview/loss': tf_loss} layout_summary = tf_custom_summaries.summary_lib.custom_scalar_pb( tf_custom_summaries.layout_pb2.Layout( category=tf_cs_categories ) ) self._create_summaries(loss, summary_dict=summary_dict, summary_list=summary_list) tf_summary_op = tf.summary.merge_all() # show network architecture # utils.show_all_variables() gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Initialize all model Variables. sess.run(tf.global_variables_initializer()) # Create Saver object for loading and storing checkpoints saver = tf.train.Saver() # Create Writer object for storing graph and summaries for TensorBoard writer_train = tf.summary.FileWriter(os.path.join(self.dir_logs,'train'), sess.graph) writer_validation = tf.summary.FileWriter(os.path.join(self.dir_logs,'val'), sess.graph) writer_train.add_summary(layout_summary) writer_validation.add_summary(layout_summary) # Reload Tensor values from latest checkpoint ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints) epoch_start = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) ckpt_name = os.path.basename(ckpt.model_checkpoint_path) epoch_start = int(ckpt_name.split('-')[-1]) # Do training loops for epoch_n in range(epoch_start, self.epoch_max): ################# # Training step # ################# utils.show_message('Running training epoch no: {0}'.format(epoch_n), lvl=1) # Reset confusion matrices and accumulated loss for CMat in CMatsTrain: CMat.Reset() loss_train = 0 # Loop through all batches of examples for batchCounter in range(math.ceil(float(dataset_sizes[0])/float(self.batch_size))): # Grab an image and label batch from the validation set image_batch, lbl_batch, *args = sess.run(input_getBatch) # Built feed dict based on list of labels feed_dict = {input_lbl: np.expand_dims(lbl_batch[:,i],1) for i,input_lbl in enumerate(input_lbls)} feed_dict.update({input_images: image_batch}) feed_dict.update({tf_is_training: True}) # Perform training step _, loss_out, lbl_batch_predict = sess.run( [optimizer_op, loss, output_logits], feed_dict=feed_dict) loss_train += loss_out # Store results from training step # Calculate confusion matrix for all outputs for i,CMat in enumerate(CMatsTrain): lbl_idx = lbl_batch[:,i] lbl_idx_predict = np.squeeze(np.argmax(lbl_batch_predict[i], axis=3)) CMat.Append(lbl_idx,lbl_idx_predict) # Show progress in stdout self._show_progress('TR', epoch_n, batchCounter, math.ceil(float(dataset_sizes[0])/float(self.batch_size))-1, loss_out, CMatsTrain) # Print accumulated confusion matricx for each output print('\n') for i, CMat in enumerate(CMatsTrain): CMat.Save(os.path.join(self.dir_logs, 'ConfMat_Train_output' + '{:02d}'.format(i) + '.csv'),'csv') print(CMat) # Create fill in summaries for training log feed_dict_summary = {tf_acc: CMat.accuracy() for tf_acc, CMat in zip(tf_accuracies,CMatsTrain)} feed_dict_summary.update({tf_rec: [0 if np.isnan(x) else x for x in CMat.recall()] for tf_rec, CMat in zip(tf_recalls,CMatsTrain)}) feed_dict_summary.update({tf_pre: [0 if np.isnan(x) else x for x in CMat.precision()] for tf_pre, CMat in zip(tf_precisions,CMatsTrain)}) feed_dict_summary.update({tf_f1: [0 if np.isnan(x) else x for x in CMat.fScore(beta=1)] for tf_f1, CMat in zip(tf_F1s,CMatsTrain)}) loss_train = loss_train/batchCounter feed_dict_summary.update({tf_loss: loss_train}) summaries = sess.run(tf_summary_op, feed_dict=feed_dict_summary) # Write summaries to training log writer_train.add_summary(summaries, global_step=epoch_n) ################### # Validation step # ################### if (tf_dataset_val is not None): # Skip validation step, if there is no validation dataset utils.show_message('Running validation epoch no: {0}'.format(epoch_n),lvl=1) # Reset confusion matrices and accumulated loss for CMat in CMatsVal: CMat.Reset() loss_val = 0 # Loop through all batches of examples for batchCounter in range(math.ceil(float(dataset_sizes[1])/float(self.batch_size))): # Grab an image and label batch from the validation set image_batch, lbl_batch, *args = sess.run(tf_input_getBatch_val) # Built feed dict based on list of labels feed_dict = {input_lbl: np.expand_dims(lbl_batch[:,i],1) for i,input_lbl in enumerate(input_lbls)} feed_dict.update({input_images: image_batch}) feed_dict.update({tf_is_training: False}) # Perform evaluation step lbl_batch_predict, loss_out = sess.run( [output_logits, loss], feed_dict=feed_dict ) # Store results from evaluation step # Calculate confusion matrix for all outputs for i,CMat in enumerate(CMatsVal): lbl_idx = lbl_batch[:,i] #np.squeeze(np.argmax(lbl_batch, axis=1)) lbl_idx_predict = np.squeeze(np.argmax(lbl_batch_predict[i], axis=3)) CMat.Append(lbl_idx,lbl_idx_predict) loss_val += loss_out # Show progress in stdout self._show_progress('VA', epoch_n, batchCounter, math.ceil(float(dataset_sizes[1])/float(self.batch_size))-1, loss_out, CMatsVal) # Print confusion matrix for each output print('\n') for i, CMat in enumerate(CMatsVal): CMat.Save(os.path.join(self.dir_logs, 'ConfMat_Val_output' + '{:02d}'.format(i) + '.csv'),'csv') # Save confusion matrix print(CMat) # Create fill in summaries for validation log feed_dict_summary = {tf_acc: CMat.accuracy() for tf_acc, CMat in zip(tf_accuracies,CMatsVal)} feed_dict_summary.update({tf_rec: [0 if np.isnan(x) else x for x in CMat.recall()] for tf_rec, CMat in zip(tf_recalls,CMatsVal)}) feed_dict_summary.update({tf_pre: [0 if np.isnan(x) else x for x in CMat.precision()] for tf_pre, CMat in zip(tf_precisions,CMatsVal)}) feed_dict_summary.update({tf_f1: [0 if np.isnan(x) else x for x in CMat.fScore(beta=1)] for tf_f1, CMat in zip(tf_F1s,CMatsVal)}) loss_val = loss_val/batchCounter feed_dict_summary.update({tf_loss: loss_val}) summaries = sess.run(tf_summary_op, feed_dict=feed_dict_summary) # Write summaries to validation log writer_validation.add_summary(summaries, global_step=epoch_n) # Save checkpoint for this epoch if epoch_n % 1 == 0: saver.save(sess,os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)
def train(self, hparams_string): """ Run training of the network Args: Returns: """ args_train = hparams_parser_train(hparams_string) self.batch_size = args_train.batch_size self.epoch_max = args_train.epoch_max self.unstructured_noise_dim = args_train.unstructured_noise_dim self.info_var_dim = args_train.info_var_dim self.n_testsamples = args_train.n_testsamples self.d_learning_rate = args_train.lr_discriminator self.g_learning_rate = args_train.lr_generator self.d_iter = args_train.d_iter self.gp_lambda = args_train.gp_lambda self.class_scale_d = args_train.class_scale_d self.class_scale_g = args_train.class_scale_g self.info_scale_d = args_train.info_scale_d self.info_scale_g = args_train.info_scale_g self.backup_frequency = args_train.backup_frequency self.shards_idx_test = args_train.shards_idx_test utils.save_model_configuration(args_train, self.dir_base) # Create folder for saving training results dir_results_train = os.path.join(self.dir_results, 'Training') utils.checkfolder(dir_results_train) for class_n in range(self.lbls_dim): dir_result_train_class = dir_results_train + '/' + str( class_n).zfill(2) utils.checkfolder(dir_result_train_class) if 0 in self.shards_idx_test: dataset_filenames = self.dataset_filenames else: self.shards_idx_test = np.subtract(self.shards_idx_test, 1) shards_idx_training = np.delete(range(len(self.dataset_filenames)), self.shards_idx_test) dataset_filenames = [ self.dataset_filenames[i] for i in shards_idx_training ] utils.show_message('Training Data:') print(dataset_filenames) # Setup preprocessing pipeline preprocessing = preprocess_factory.preprocess_factory() # Dataset specific preprocessing if self.dataset == 'MNIST': pass elif self.dataset == 'PSD_Nonsegmented': pass elif self.dataset == 'PSD_Segmented': preprocessing.prep_pipe_from_string( "pad_to_size;{'height': 566, 'width': 566, 'constant': -1.0};random_rotation;{};crop_to_size;{'height': 400, 'width': 400};resize;{'height': 128, 'width': 128}" ) # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data) # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph. dataset = tf.data.TFRecordDataset(dataset_filenames) dataset = dataset.shuffle(buffer_size=10000, seed=None) dataset = dataset.map(util_data.decode_image) # decoding the tfrecord dataset = dataset.map( self._genLatentCodes) # preprocess data and perform augmentation dataset = dataset.map(preprocessing.pipe) dataset = dataset.batch(batch_size=self.batch_size) iterator = dataset.make_initializable_iterator() input_getBatch = iterator.get_next() # Create input placeholders input_images = tf.placeholder(dtype=tf.float32, shape=[self.batch_size] + self.image_dims, name='input_images') input_lbls = tf.placeholder(dtype=tf.float32, shape=[None, self.lbls_dim], name='input_lbls') input_unstructured_noise = tf.placeholder( dtype=tf.float32, shape=[None, self.unstructured_noise_dim], name='input_unstructured_noise') input_info_noise = tf.placeholder(dtype=tf.float32, shape=[None, self.info_var_dim], name='input_info_noise') input_test_lbls = tf.placeholder(dtype=tf.float32, shape=[ self.n_testsamples**np.minimum( 2, self.info_var_dim), self.lbls_dim ], name='input_test_lbls') input_test_noise = tf.placeholder(dtype=tf.float32, shape=[ self.n_testsamples**np.minimum( 2, self.info_var_dim), self.unstructured_noise_dim ], name='input_test_noise') input_test_info_noise = tf.placeholder( dtype=tf.float32, shape=[ self.n_testsamples**np.minimum(2, self.info_var_dim), self.info_var_dim ], name='input_test_info_noise') # Define model, loss, optimizer and summaries. logits_source, logits_class, logits_info, artificial_images = self._create_inference( input_images, input_lbls, input_unstructured_noise, input_info_noise) loss_discriminator, loss_generator = self._create_losses( logits_source, logits_class, logits_info, artificial_images, input_lbls, input_info_noise) train_op_discriminator, train_op_generator = self._create_optimizer( loss_discriminator, loss_generator) summary_op_dloss, summary_op_gloss, summary_op_img, summary_img = self._create_summaries( loss_discriminator, loss_generator, input_test_noise, input_test_lbls, input_test_info_noise) # show network architecture utils.show_all_variables() # create constant test variable to inspect changes in the model self.combinations_info_var = itertools.combinations( range(self.info_var_dim), 2) self.combinations_info_var = list(self.combinations_info_var) test_noise, test_info = self._genTestInput() with tf.Session() as sess: # Initialize all model Variables. sess.run(tf.global_variables_initializer()) # Create Saver object for loading and storing checkpoints saver = tf.train.Saver(max_to_keep=500) # Create Writer object for storing graph and summaries for TensorBoard writer = tf.summary.FileWriter(self.dir_logs, sess.graph) # Reload Tensor values from latest checkpoint ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints) epoch_start = 0 if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) ckpt_name = os.path.basename(ckpt.model_checkpoint_path) epoch_start = int(ckpt_name.split('-')[-1]) + 1 interationCnt = 0 for epoch_n in range(epoch_start, self.epoch_max): # Test model output before any training if epoch_n == 0: for class_n in range(self.lbls_dim): test_lbls = np.zeros([ self.n_testsamples**np.minimum( 2, self.info_var_dim), self.lbls_dim ]) test_lbls[:, class_n] = 1 for i in range(len(test_info)): test_info_combi = test_info[i] _, summaryImg = sess.run( [summary_op_img, summary_img], feed_dict={ input_test_noise: test_noise, input_test_lbls: test_lbls, input_test_info_noise: test_info_combi }) dir_result_train_class = dir_results_train + '/' + str( class_n).zfill(2) if self.info_var_dim < 2: filename_temp = 'Epoch_{0}_LatentVar_1'.format( epoch_n) else: filename_temp = 'Epoch_{0}_LatentCombi_{1}_{2}'.format( epoch_n, self.combinations_info_var[i][0], self.combinations_info_var[i][1]) # writer.add_summary(summaryImg_tb, global_step=epoch_n) utils.save_image_local(summaryImg, dir_result_train_class, filename_temp) # Initiate or Re-initiate iterator sess.run(iterator.initializer) ### ---------------------------------------------------------- ### Update model if (np.mod(epoch_n, 100) == 0) or epoch_n < 25: utils.show_message( 'Running training epoch no: {0}'.format(epoch_n)) while True: # for idx in range(0, num_batches): try: for _ in range(self.d_iter): image_batch, lbl_batch, unst_noise_batch, info_noise_batch = sess.run( input_getBatch) if (image_batch.shape[0] != self.batch_size): raise OutOfRangeError _, summary_dloss = sess.run( [train_op_discriminator, summary_op_dloss], feed_dict={ input_images: image_batch, input_lbls: lbl_batch, input_unstructured_noise: unst_noise_batch, input_info_noise: info_noise_batch }) writer.add_summary(summary_dloss, global_step=interationCnt) _, summary_gloss = sess.run( [train_op_generator, summary_op_gloss], feed_dict={ input_images: image_batch, input_lbls: lbl_batch, input_unstructured_noise: unst_noise_batch, input_info_noise: info_noise_batch }) writer.add_summary(summary_gloss, global_step=interationCnt) interationCnt += 1 except (tf.errors.OutOfRangeError, OutOfRangeError): # Test current model for class_n in range(self.lbls_dim): test_lbls = np.zeros([ self.n_testsamples**np.minimum( 2, self.info_var_dim), self.lbls_dim ]) test_lbls[:, class_n] = 1 for i in range(len(test_info)): test_info_combi = test_info[i] _, summaryImg = sess.run( [summary_op_img, summary_img], feed_dict={ input_test_noise: test_noise, input_test_lbls: test_lbls, input_test_info_noise: test_info_combi }) dir_result_train_class = dir_results_train + '/' + str( class_n).zfill(2) if self.info_var_dim < 2: filename_temp = 'Epoch_{0}_LatentVar_1'.format( epoch_n) else: filename_temp = 'Epoch_{0}_LatentCombi_{1}_{2}'.format( epoch_n, self.combinations_info_var[i][0], self.combinations_info_var[i][1]) # writer.add_summary(summaryImg_tb, global_step=epoch_n) utils.save_image_local(summaryImg, dir_result_train_class, filename_temp) break # Save model variables to checkpoint if (epoch_n + 1) % self.backup_frequency == 0: saver.save(sess, os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)