Esempio n. 1
0
    def train(self, hparams_string):
        """ Run training of the network
        Args:
    
        Returns:
        """
        args_train = hparams_parser_train(hparams_string)

        self.batch_size = args_train.batch_size
        self.epoch_max = args_train.epoch_max

        utils.save_model_configuration(args_train, self.dir_base)

        # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data)
        # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph.
        dataset = tf.data.TFRecordDataset(self.dateset_filenames)
        dataset = dataset.map(util_data.decode_image)  # decoding the tfrecord
        dataset = dataset.map(
            self._preProcessData)  # potential local preprocessing of data
        dataset = dataset.shuffle(buffer_size=10000, seed=None)
        dataset = dataset.batch(batch_size=self.batch_size)
        iterator = dataset.make_initializable_iterator()
        inputs = iterator.get_next()

        # depends on self._preProcessData
        [in_image, in_label] = inputs

        # show network architecture
        utils.show_all_variables()

        # define model, loss, optimizer and summaries.
        outputs = self._create_inference(in_image)
        loss = self._create_losses(outputs, in_label)
        optimizer_op = self._create_optimizer(loss)
        summary_op = self._create_summaries(loss)

        with tf.Session() as sess:

            # Initialize all model Variables.
            sess.run(tf.global_variables_initializer())

            # Create Saver object for loading and storing checkpoints
            saver = tf.train.Saver()

            # Create Writer object for storing graph and summaries for TensorBoard
            writer = tf.summary.FileWriter(self.dir_logs, sess.graph)

            # Reload Tensor values from latest checkpoint
            ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints)
            epoch_start = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                epoch_start = int(ckpt_name.split('-')[-1])

            interationCnt = 0
            # Do training loops
            for epoch_n in range(epoch_start, self.epoch_max):

                # Initiate or Re-initiate iterator
                sess.run(iterator.initializer)

                # Test model output before any training
                if epoch_n == 0:
                    summary = sess.run(summary_op)
                    writer.add_summary(summary, global_step=-1)

                utils.show_message(
                    'Running training epoch no: {0}'.format(epoch_n))
                while True:
                    try:
                        _, summary = sess.run([optimizer_op, summary_op])

                        writer.add_summary(summary, global_step=interationCnt)
                        counter = +1

                    except tf.errors.OutOfRangeError:
                        # Do some evaluation after each Epoch
                        break

                if epoch_n % 1 == 0:
                    saver.save(sess,
                               os.path.join(self.dir_checkpoints,
                                            self.model + '.model'),
                               global_step=epoch_n)
Esempio n. 2
0
    def train(self, hparams_string):
        """ Run training of the network
        Args:
    
        Returns:
        """
        args_train = hparams_parser_train(hparams_string)

        self.batch_size = args_train.batch_size
        self.epoch_max = args_train.epoch_max
        self.unstructured_noise_dim = args_train.unstructured_noise_dim

        self.d_learning_rate = args_train.lr_discriminator
        self.g_learning_rate = args_train.lr_generator

        self.d_iter = args_train.d_iter
        self.n_testsamples = args_train.n_testsamples

        self.class_scale_d = args_train.class_scale_d
        self.class_scale_g = args_train.class_scale_g

        self.backup_frequency = args_train.backup_frequency

        utils.save_model_configuration(args_train, self.dir_base)
        

        # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data)
        # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph.
        dataset = tf.data.TFRecordDataset(self.dateset_filenames)
        dataset = dataset.map(util_data.decode_image)      # decoding the tfrecord
        dataset = dataset.map(self._genLatentCodes)
        dataset = dataset.shuffle(buffer_size = 10000, seed = None)
        dataset = dataset.batch(batch_size = self.batch_size)
        iterator = dataset.make_initializable_iterator()
        input_getBatch = iterator.get_next()

        # Create input placeholders
        input_images = tf.placeholder(
            dtype = tf.float32, 
            shape = [None] + self.image_dims, 
            name = 'input_images')
        input_lbls = tf.placeholder(
            dtype = tf.float32,   
            shape = [None, self.lbls_dim], 
            name = 'input_lbls')
        input_unstructured_noise = tf.placeholder(
            dtype = tf.float32, 
            shape = [None, self.unstructured_noise_dim], 
            name = 'input_unstructured_noise')
        input_test_lbls = tf.placeholder(
            dtype = tf.float32, 
            shape = [self.n_testsamples * self.lbls_dim, self.lbls_dim], 
            name = 'input_test_lbls')
        input_test_noise = tf.placeholder(
            dtype = tf.float32, 
            shape = [self.n_testsamples * self.lbls_dim, self.unstructured_noise_dim], 
            name = 'input_test_noise')
               
        
        # Define model, loss, optimizer and summaries.
        logits_source, logits_class, _ = self._create_inference(input_images, input_lbls, input_unstructured_noise)
        loss_discriminator, loss_generator = self._create_losses(logits_source, logits_class, input_lbls)
        train_op_discriminator, train_op_generator = self._create_optimizer(loss_discriminator, loss_generator)
        summary_op_dloss, summary_op_gloss, summary_op_img, summary_img = self._create_summaries(loss_discriminator, loss_generator, input_test_noise, input_test_lbls)

        # show network architecture
        utils.show_all_variables()

        # create constant test variable to inspect changes in the model
        test_noise, test_lbls = self._genTestInput(self.lbls_dim, n_samples = self.n_testsamples)

        dir_results_train = os.path.join(self.dir_results, 'Training')
        utils.checkfolder(dir_results_train)

        with tf.Session() as sess:
            # Initialize all model Variables.
            sess.run(tf.global_variables_initializer())
            
            # Create Saver object for loading and storing checkpoints
            saver = tf.train.Saver()
            
            # Create Writer object for storing graph and summaries for TensorBoard
            writer = tf.summary.FileWriter(self.dir_logs, sess.graph)

            # Reload Tensor values from latest checkpoint
            ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints)
            epoch_start = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                epoch_start = int(ckpt_name.split('-')[-1]) + 1
            
            interationCnt = 0
            for epoch_n in range(epoch_start, self.epoch_max):

                # Test model output before any training
                if epoch_n == 0:
                    summaryImg_tb, summaryImg = sess.run(
                        [summary_op_img, summary_img],
                        feed_dict={input_test_noise:    test_noise,
                                   input_test_lbls:     test_lbls})

                    writer.add_summary(summaryImg_tb, global_step=-1)
                    utils.save_image_local(summaryImg, dir_results_train, 'Epoch_' + str(-1))

                # Initiate or Re-initiate iterator
                sess.run(iterator.initializer)
                
                ### ----------------------------------------------------------
                ### Update model
                print(datetime.datetime.now(),'- Running training epoch no:', epoch_n)
                while True:
                # for idx in range(0, num_batches):
                    try:
                        for _ in range(self.d_iter):
                            image_batch, lbl_batch, unst_noise_batch = sess.run(input_getBatch)

                            _, summary_dloss, _ = sess.run(
                                [train_op_discriminator, summary_op_dloss],
                                feed_dict={input_images:             image_batch,
                                        input_lbls:               lbl_batch,
                                        input_unstructured_noise: unst_noise_batch})
                                        
                        writer.add_summary(summary_dloss, global_step=interationCnt)

                        _, summary_gloss = sess.run(
                            [train_op_generator, summary_op_gloss],
                            feed_dict={input_images:             image_batch,
                                       input_lbls:               lbl_batch,
                                       input_unstructured_noise: unst_noise_batch})

                        writer.add_summary(summary_gloss, global_step=interationCnt)
                        interationCnt += 1

                    except tf.errors.OutOfRangeError:
                        # Test current model
                        summaryImg_tb, summaryImg = sess.run(
                            [summary_op_img, summary_img],
                            feed_dict={input_test_noise:    test_noise,
                                        input_test_lbls:     test_lbls})

                        writer.add_summary(summaryImg_tb, global_step=epoch_n)
                        utils.save_image_local(summaryImg, dir_results_train, 'Epoch_' + str(epoch_n))

                        break
                
                # Save model variables to checkpoint
                if (epoch_n +1) % self.backup_frequency == 0:
                    saver.save(sess,os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)
Esempio n. 3
0
    def train(self, hparams_string):
        """ Run training of the network
        Args:
    
        Returns:
        """

        args_train = hparams_parser_train(hparams_string)
        self.batch_size = args_train.batch_size
        self.epoch_max = args_train.epoch_max
        self.use_imagenet = args_train.use_imagenet
        self.model_version = args_train.model_version

        utils.save_model_configuration(args_train, self.dir_base)

        # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data)
        # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph.
        dataset = tf.data.TFRecordDataset(self.dateset_filenames)
        dataset = dataset.map(util_data.decode_image)  # decoding the tfrecord
        dataset = dataset.map(
            self._preProcessData)  # potential local preprocessing of data
        dataset = dataset.shuffle(buffer_size=10000, seed=None)
        dataset = dataset.batch(batch_size=self.batch_size)
        iterator = dataset.make_initializable_iterator()
        input_getBatch = iterator.get_next()

        input_images = tf.placeholder(dtype=tf.float32,
                                      shape=[None] + self.image_dims,
                                      name='input_images')
        input_lbls = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.lbls_dim],
                                    name='input_lbls')

        # define model, loss, optimizer and summaries.
        output_logits = self._create_inference(input_images)
        loss = self._create_losses(output_logits, input_lbls)
        optimizer_op = self._create_optimizer(loss)
        summary_op = self._create_summaries(loss)

        # show network architecture
        utils.show_all_variables()

        if self.use_imagenet:
            if self.model_version == 'VGG16':
                path_imagenet_ckpt = os.path.join(self.dir_checkpoints,
                                                  'vgg_16.ckpt')
                if not tf.gfile.Exists(path_imagenet_ckpt):
                    url_imagenet_model = "http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz"
                    utils.download_and_uncompress_tarball(
                        url_imagenet_model, self.dir_checkpoints)

                variables_to_restore = slim.get_model_variables('vgg_16')
                variables_to_restore = variables_to_restore[:
                                                            -6]  # ignore fc layers
                init_fn = slim.assign_from_checkpoint_fn(
                    path_imagenet_ckpt, variables_to_restore)

            elif self.model_version == 'VGG19':
                path_imagenet_ckpt = os.path.join(self.dir_checkpoints,
                                                  'vgg_19.ckpt')
                if not tf.gfile.Exists(path_imagenet_ckpt):
                    url_imagenet_model = "http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz"
                    utils.download_and_uncompress_tarball(
                        url_imagenet_model, self.dir_checkpoints)

                variables_to_restore = slim.get_model_variables('vgg_19')
                variables_to_restore = variables_to_restore[:
                                                            -6]  # ignore fc layers
                init_fn = slim.assign_from_checkpoint_fn(
                    path_imagenet_ckpt, variables_to_restore)

        with tf.Session() as sess:

            # Initialize all model Variables.
            sess.run(tf.global_variables_initializer())

            if self.use_imagenet:
                init_fn(sess)

            # Create Saver object for loading and storing checkpoints
            saver = tf.train.Saver()

            # Create Writer object for storing graph and summaries for TensorBoard
            writer = tf.summary.FileWriter(self.dir_logs, sess.graph)

            # Reload Tensor values from latest checkpoint
            ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints)
            epoch_start = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                epoch_start = int(ckpt_name.split('-')[-1])

            interationCnt = 0
            # Do training loops
            for epoch_n in range(epoch_start, self.epoch_max):

                # Initiate or Re-initiate iterator
                sess.run(iterator.initializer)

                # Test model output before any training
                # if epoch_n == 0:
                #     summary_loss = sess.run(summary_op)
                #     writer.add_summary(summary_loss, global_step=-1)

                utils.show_message(
                    'Running training epoch no: {0}'.format(epoch_n))
                while True:
                    try:
                        image_batch, lbl_batch = sess.run(input_getBatch)
                        _, summary_loss = sess.run([optimizer_op, summary_op],
                                                   feed_dict={
                                                       input_images:
                                                       image_batch,
                                                       input_lbls: lbl_batch
                                                   })

                        writer.add_summary(summary_loss,
                                           global_step=interationCnt)
                        counter = +1

                    except tf.errors.OutOfRangeError:
                        # Do some evaluation after each Epoch
                        break

                if epoch_n % 1 == 0:
                    saver.save(sess,
                               os.path.join(self.dir_checkpoints,
                                            self.model + '.model'),
                               global_step=epoch_n)
Esempio n. 4
0
    def train(self, hparams_string, preprocessing_params='', preprocessing_eval_params=''):
        """ Run training of the network
        Args:
    
        Returns:
        """

        args_train = hparams_parser_train(hparams_string)
        self.batch_size = args_train.batch_size
        self.epoch_max = args_train.epoch_max 
        self.model_version = args_train.model_version
        pretrained_model_path = args_train.pretrained_model
        use_pretrained_model = False if pretrained_model_path is '' else True
        pretrain_exclude_input = args_train.pretrain_exclude_input
        pretrain_exclude_output = args_train.pretrain_exclude_output
        optim_vars = args_train.optim_vars
        args_train.preprocessing = preprocessing_params
        args_train.preprocessing_eval = preprocessing_eval_params


        print('Training parameters:')
        print(args_train)

        utils.save_model_configuration(args_train, self.dir_base)
        
        # Load dataset
        if (self.dataset == 'PSD_Segmented'):
            DS = DS_PSDs.Dataset()
        elif (self.dataset == 'seeds_all'):
            DS = DS_Seeds.Dataset()
        elif (self.dataset == 'barley'):
            DS = DS_Barley.Dataset()
        elif (self.dataset == 'barley_abnormal'):
            DS = DS_Barley_Abnormal.Dataset()
        elif (self.dataset == 'barley_d0'):
            DS = DS_Barley_D0.Dataset()
        elif (self.dataset == 'barley_next'):
            DS = DS_Barley_Next.Dataset()
        elif (self.dataset == 'barley_next_stratified'):
            DS = DS_Barley_Next_Stratified.Dataset()
        elif (self.dataset == 'okra'):
            DS = DS_Okra.Dataset()
        elif (self.dataset == 'okra_abnormal'):
            DS = DS_Okra_Abnormal.Dataset()
        elif (self.dataset == 'okra_next'):
            DS = DS_Okra_next.Dataset()
        elif (self.dataset == 'okra_d0'):
            DS = DS_Okra_D0.Dataset()
        tf_dataset_list, dataset_sizes = DS.get_dataset_list(data_source = args_train.data_source,
                                                            data_folder = args_train.data_folder,
                                                            shuffle_before_split=args_train.shuffle_before_split,
                                                            shuffle_seed=args_train.shuffle_seed,
                                                            group_before_split=args_train.group_before_split,
                                                            validation_method=args_train.validation_method,
                                                            holdout_split=args_train.holdout_split,
                                                            cross_folds=10,
                                                            cross_val_fold=None,
                                                            cross_test_fold=0,
                                                            shard_val=args_train.shard_val,
                                                            shard_test=args_train.shard_test,
                                                            stratify_training_set=args_train.stratify_training_set)

        with tf.Session('') as tf_session:
            DS.save_dataset_filenames(os.path.join(self.dir_logs, 'filenames_training.txt'),tf_dataset_list[0], tf_session)
            DS.save_dataset_filenames(os.path.join(self.dir_logs, 'filenames_validation.txt'),tf_dataset_list[1], tf_session)
            DS.save_dataset_filenames(os.path.join(self.dir_logs, 'filenames_test.txt'),tf_dataset_list[2], tf_session)

        class_dicts = DS.get_class_dicts()
        num_classes = [len(class_dict) for class_dict in class_dicts]

        preprocessing = preprocess_factory.preprocess_factory()
        if not (preprocessing_params == ''):
            # Setup preprocessing pipeline
            preprocessing.prep_pipe_from_string(preprocessing_params)

        with tf.name_scope('Training_dataset'):
            tf_dataset_train = tf_dataset_list[0]
            tf_dataset_train = tf_dataset_train.shuffle(buffer_size = 10000, seed = None)
            tf_dataset_train = tf_dataset_train.map(DS._decode_from_TFexample)
            tf_dataset_train = tf_dataset_train.map(preprocessing.pipe)
            tf_dataset_train = tf_dataset_train.batch(batch_size = self.batch_size, drop_remainder=False)
            tf_dataset_train = tf_dataset_train.repeat(count=-1) # -1 --> repeat indefinitely
            # tf_dataset_train = tf_dataset_train.prefetch(buffer_size=3)
            tf_dataset_train_iterator = tf_dataset_train.make_one_shot_iterator()
            input_getBatch = tf_dataset_train_iterator.get_next()

        # Setup preprocessing pipeline
        preprocessing_eval = preprocess_factory.preprocess_factory()
        if not (preprocessing_eval_params == ''):
            preprocessing_eval.prep_pipe_from_string(preprocessing_eval_params)
        elif not (preprocessing_params ==''): # Use same preprocessing as training step, if it is not specified for validation step
            preprocessing_eval.prep_pipe_from_string(preprocessing_params)
        else:
            pass # If no preprocessing is specified, dont to any preprocessing

        with tf.name_scope('Validation_dataset'):
            tf_dataset_val = tf_dataset_list[1]
            if (tf_dataset_val is not None):
                tf_dataset_val = tf_dataset_val.map(DS._decode_from_TFexample)
                tf_dataset_val = tf_dataset_val.map(preprocessing_eval.pipe)
                tf_dataset_val = tf_dataset_val.batch(batch_size = self.batch_size, drop_remainder=False)
                tf_dataset_val = tf_dataset_val.repeat(count=-1) # -1 --> repeat indefinitely
                # tf_dataset_val = tf_dataset_val.prefetch(buffer_size=3)
                tf_dataset_val_iterator = tf_dataset_val.make_one_shot_iterator()
                tf_input_getBatch_val = tf_dataset_val_iterator.get_next()

        # Define input and output layers
        input_images = tf.placeholder(
            dtype = tf.float32, 
            shape = [None] + self.image_dims, 
            name = 'input_images')
        input_lbls = []
        for i, N_classes in enumerate(num_classes):
            input_lbls.append(
                                tf.placeholder(
                                    dtype = tf.uint8,   
                                    shape = [None, 1], # shape = [None, N_classes],
                                    name = 'input_lbls' + str(i)
                                )
                            )
        tf_is_training = tf.placeholder(
            dtype = tf.bool,
            shape = (),
            name = 'is_training_flag'
        )
        # define model model and load pre-trained model
        output_logits, endpoints, input_layer_name, output_layer_names = self._create_inference(input_images, is_training=tf_is_training, num_classes=num_classes, global_pool=args_train.global_pool)
        if (use_pretrained_model):
            exclude_layers = []
            if (pretrain_exclude_input):
                exclude_layers += input_layer_name
            if (pretrain_exclude_output):
                exclude_layers += output_layer_names
            output_logits, model_vars_restored, model_vars_not_restored = self._load_pretrained_model(output_logits, pretrained_model_path, exclude_layers) #['resnet_v1_50/conv1','resnet_v1_50/logits']) #['resnet_v1_50/conv1','resnet_v1_50/logits'])
        else:
            model_vars_restored = []
            model_vars_not_restored = [value for key,value in endpoints.items()]
        
        # Setup loss function
        loss = self._create_losses(output_logits, input_lbls, num_classes)

        # Setup optimizer
        variables_to_optimize = None
        if (optim_vars == 'all'):
            variables_to_optimize = None
        elif (optim_vars == 'non_restored'):
            variables_to_optimize = model_vars_not_restored
        else:
            raise NotImplementedError('Value set for optim_vars not implemented. Value = ' + optim_vars)
        
        optimizer_op = self._create_optimizer(loss, variables_to_optimize=variables_to_optimize, learning_rate=args_train.learning_rate)
        
        # Setup summaries
        CMatsTrain = [CM.confusionmatrix(N_classes) for N_classes in num_classes]
        CMatsVal = [CM.confusionmatrix(N_classes) for N_classes in num_classes]
        tf_loss = tf.placeholder(tf.float32, name='loss_mean')
        tf_accuracies = []
        tf_recalls = []
        tf_precisions = []
        tf_F1s = []
        tf_cs_categories = []
        for i, N_classes in enumerate(num_classes):
            tf_accuracies.append(tf.placeholder(dtype = tf.float32, name = 'Overview/Accuracy' + str(i)) )
            with tf.name_scope('output_' + str(i)):
                tf_recall, tf_chart_recall = tf_custom_summaries.class_score_mmm('Recall')
                tf_recalls.append(tf_recall)
                tf_precision, tf_chart_precision = tf_custom_summaries.class_score_mmm('Precision')
                tf_precisions.append(tf_precision)
                tf_F1, tf_chart_F1 = tf_custom_summaries.class_score_mmm('F1')
                tf_F1s.append(tf_F1)
            tf_cs_categories.append(
                                tf_custom_summaries.layout_pb2.Category(
                                    title='output' + str(i),
                                    chart=[tf_chart_F1, tf_chart_precision, tf_chart_recall]
                                )
                            )
        summary_list = tf_accuracies
        summary_dict = {'Overview/loss':         tf_loss}

        layout_summary = tf_custom_summaries.summary_lib.custom_scalar_pb(
                                tf_custom_summaries.layout_pb2.Layout(
                                    category=tf_cs_categories
                                    )
                            )
        self._create_summaries(loss, summary_dict=summary_dict, summary_list=summary_list)
        tf_summary_op = tf.summary.merge_all()
        
        # show network architecture
        # utils.show_all_variables()
        
        gpu_options = tf.GPUOptions(allow_growth=True)
        with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            
            # Initialize all model Variables.
            sess.run(tf.global_variables_initializer())
            
            # Create Saver object for loading and storing checkpoints
            saver = tf.train.Saver()
            
            # Create Writer object for storing graph and summaries for TensorBoard
            writer_train = tf.summary.FileWriter(os.path.join(self.dir_logs,'train'), sess.graph)
            writer_validation = tf.summary.FileWriter(os.path.join(self.dir_logs,'val'), sess.graph)
            writer_train.add_summary(layout_summary)
            writer_validation.add_summary(layout_summary)
            
            # Reload Tensor values from latest checkpoint
            ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints)
            epoch_start = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                epoch_start = int(ckpt_name.split('-')[-1])
            
            # Do training loops
            for epoch_n in range(epoch_start, self.epoch_max):
                
                #################
                # Training step #
                #################
                utils.show_message('Running training epoch no: {0}'.format(epoch_n), lvl=1)
                # Reset confusion matrices and accumulated loss
                for CMat in CMatsTrain:
                    CMat.Reset()
                loss_train = 0
                 # Loop through all batches of examples
                for batchCounter in range(math.ceil(float(dataset_sizes[0])/float(self.batch_size))):
                    # Grab an image and label batch from the validation set
                    image_batch, lbl_batch, *args = sess.run(input_getBatch)
                    # Built feed dict based on list of labels
                    feed_dict = {input_lbl: np.expand_dims(lbl_batch[:,i],1) for i,input_lbl in enumerate(input_lbls)}
                    feed_dict.update({input_images:    image_batch})
                    feed_dict.update({tf_is_training: True})

                    # Perform training step
                    _, loss_out, lbl_batch_predict = sess.run(
                        [optimizer_op, loss, output_logits],
                        feed_dict=feed_dict)
                    loss_train += loss_out
                    # Store results from training step
                    # Calculate confusion matrix for all outputs
                    for i,CMat in enumerate(CMatsTrain):
                        lbl_idx = lbl_batch[:,i]
                        lbl_idx_predict = np.squeeze(np.argmax(lbl_batch_predict[i], axis=3))
                        CMat.Append(lbl_idx,lbl_idx_predict)
                    # Show progress in stdout
                    self._show_progress('TR', epoch_n, batchCounter, math.ceil(float(dataset_sizes[0])/float(self.batch_size))-1, loss_out, CMatsTrain)

                # Print accumulated confusion matricx for each output
                print('\n')
                for i, CMat in enumerate(CMatsTrain):
                    CMat.Save(os.path.join(self.dir_logs, 'ConfMat_Train_output' + '{:02d}'.format(i) + '.csv'),'csv')
                    print(CMat)
                
                # Create fill in summaries for training log
                feed_dict_summary = {tf_acc: CMat.accuracy() for tf_acc, CMat in zip(tf_accuracies,CMatsTrain)}
                feed_dict_summary.update({tf_rec: [0 if np.isnan(x) else x for x in CMat.recall()] for tf_rec, CMat in zip(tf_recalls,CMatsTrain)})
                feed_dict_summary.update({tf_pre: [0 if np.isnan(x) else x for x in CMat.precision()] for tf_pre, CMat in zip(tf_precisions,CMatsTrain)})
                feed_dict_summary.update({tf_f1:  [0 if np.isnan(x) else x for x in CMat.fScore(beta=1)] for tf_f1, CMat in zip(tf_F1s,CMatsTrain)})
                loss_train = loss_train/batchCounter
                feed_dict_summary.update({tf_loss: loss_train})
                summaries = sess.run(tf_summary_op, 
                                    feed_dict=feed_dict_summary)
                # Write summaries to training log
                writer_train.add_summary(summaries, global_step=epoch_n)

                ###################
                # Validation step #
                ###################

                if (tf_dataset_val is not None): # Skip validation step, if there is no validation dataset
                    utils.show_message('Running validation epoch no: {0}'.format(epoch_n),lvl=1)
                    # Reset confusion matrices and accumulated loss
                    for CMat in CMatsVal:
                        CMat.Reset()
                    loss_val = 0
                    # Loop through all batches of examples
                    for batchCounter in range(math.ceil(float(dataset_sizes[1])/float(self.batch_size))):
                        # Grab an image and label batch from the validation set
                        image_batch, lbl_batch, *args = sess.run(tf_input_getBatch_val)
                        # Built feed dict based on list of labels
                        feed_dict = {input_lbl: np.expand_dims(lbl_batch[:,i],1) for i,input_lbl in enumerate(input_lbls)}
                        feed_dict.update({input_images:    image_batch})
                        feed_dict.update({tf_is_training: False})

                        # Perform evaluation step
                        lbl_batch_predict, loss_out = sess.run(
                                                            [output_logits, loss],
                                                            feed_dict=feed_dict
                                                        )
                        # Store results from evaluation step
                        # Calculate confusion matrix for all outputs
                        for i,CMat in enumerate(CMatsVal):
                            lbl_idx = lbl_batch[:,i] #np.squeeze(np.argmax(lbl_batch, axis=1))
                            lbl_idx_predict = np.squeeze(np.argmax(lbl_batch_predict[i], axis=3))
                            CMat.Append(lbl_idx,lbl_idx_predict)
                        loss_val += loss_out
                        # Show progress in stdout
                        self._show_progress('VA', epoch_n, batchCounter, math.ceil(float(dataset_sizes[1])/float(self.batch_size))-1, loss_out, CMatsVal)
                    
                    # Print confusion matrix for each output
                    print('\n')
                    for i, CMat in enumerate(CMatsVal):
                        CMat.Save(os.path.join(self.dir_logs, 'ConfMat_Val_output' + '{:02d}'.format(i) + '.csv'),'csv') # Save confusion matrix
                        print(CMat)

                    # Create fill in summaries for validation log
                    feed_dict_summary = {tf_acc: CMat.accuracy() for tf_acc, CMat in zip(tf_accuracies,CMatsVal)}
                    feed_dict_summary.update({tf_rec: [0 if np.isnan(x) else x for x in CMat.recall()] for tf_rec, CMat in zip(tf_recalls,CMatsVal)})
                    feed_dict_summary.update({tf_pre: [0 if np.isnan(x) else x for x in CMat.precision()] for tf_pre, CMat in zip(tf_precisions,CMatsVal)})
                    feed_dict_summary.update({tf_f1:  [0 if np.isnan(x) else x for x in CMat.fScore(beta=1)] for tf_f1, CMat in zip(tf_F1s,CMatsVal)})
                    loss_val = loss_val/batchCounter
                    feed_dict_summary.update({tf_loss: loss_val})
                    summaries = sess.run(tf_summary_op, 
                                        feed_dict=feed_dict_summary)
                    # Write summaries to validation log
                    writer_validation.add_summary(summaries, global_step=epoch_n)
                
                # Save checkpoint for this epoch
                if epoch_n % 1 == 0:
                    saver.save(sess,os.path.join(self.dir_checkpoints, self.model + '.model'), global_step=epoch_n)
Esempio n. 5
0
    def train(self, hparams_string):
        """ Run training of the network
        Args:
    
        Returns:
        """
        args_train = hparams_parser_train(hparams_string)

        self.batch_size = args_train.batch_size
        self.epoch_max = args_train.epoch_max

        self.unstructured_noise_dim = args_train.unstructured_noise_dim
        self.info_var_dim = args_train.info_var_dim
        self.n_testsamples = args_train.n_testsamples

        self.d_learning_rate = args_train.lr_discriminator
        self.g_learning_rate = args_train.lr_generator
        self.d_iter = args_train.d_iter

        self.gp_lambda = args_train.gp_lambda
        self.class_scale_d = args_train.class_scale_d
        self.class_scale_g = args_train.class_scale_g

        self.info_scale_d = args_train.info_scale_d
        self.info_scale_g = args_train.info_scale_g

        self.backup_frequency = args_train.backup_frequency

        self.shards_idx_test = args_train.shards_idx_test

        utils.save_model_configuration(args_train, self.dir_base)

        # Create folder for saving training results
        dir_results_train = os.path.join(self.dir_results, 'Training')
        utils.checkfolder(dir_results_train)

        for class_n in range(self.lbls_dim):
            dir_result_train_class = dir_results_train + '/' + str(
                class_n).zfill(2)
            utils.checkfolder(dir_result_train_class)

        if 0 in self.shards_idx_test:
            dataset_filenames = self.dataset_filenames
        else:
            self.shards_idx_test = np.subtract(self.shards_idx_test, 1)
            shards_idx_training = np.delete(range(len(self.dataset_filenames)),
                                            self.shards_idx_test)
            dataset_filenames = [
                self.dataset_filenames[i] for i in shards_idx_training
            ]

            utils.show_message('Training Data:')
            print(dataset_filenames)

        # Setup preprocessing pipeline
        preprocessing = preprocess_factory.preprocess_factory()

        # Dataset specific preprocessing
        if self.dataset == 'MNIST':
            pass

        elif self.dataset == 'PSD_Nonsegmented':
            pass

        elif self.dataset == 'PSD_Segmented':
            preprocessing.prep_pipe_from_string(
                "pad_to_size;{'height': 566, 'width': 566, 'constant': -1.0};random_rotation;{};crop_to_size;{'height': 400, 'width': 400};resize;{'height': 128, 'width': 128}"
            )

        # Use dataset for loading in datasamples from .tfrecord (https://www.tensorflow.org/programmers_guide/datasets#consuming_tfrecord_data)
        # The iterator will get a new batch from the dataset each time a sess.run() is executed on the graph.
        dataset = tf.data.TFRecordDataset(dataset_filenames)
        dataset = dataset.shuffle(buffer_size=10000, seed=None)
        dataset = dataset.map(util_data.decode_image)  # decoding the tfrecord
        dataset = dataset.map(
            self._genLatentCodes)  # preprocess data and perform augmentation
        dataset = dataset.map(preprocessing.pipe)
        dataset = dataset.batch(batch_size=self.batch_size)
        iterator = dataset.make_initializable_iterator()
        input_getBatch = iterator.get_next()

        # Create input placeholders
        input_images = tf.placeholder(dtype=tf.float32,
                                      shape=[self.batch_size] +
                                      self.image_dims,
                                      name='input_images')
        input_lbls = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.lbls_dim],
                                    name='input_lbls')
        input_unstructured_noise = tf.placeholder(
            dtype=tf.float32,
            shape=[None, self.unstructured_noise_dim],
            name='input_unstructured_noise')
        input_info_noise = tf.placeholder(dtype=tf.float32,
                                          shape=[None, self.info_var_dim],
                                          name='input_info_noise')
        input_test_lbls = tf.placeholder(dtype=tf.float32,
                                         shape=[
                                             self.n_testsamples**np.minimum(
                                                 2, self.info_var_dim),
                                             self.lbls_dim
                                         ],
                                         name='input_test_lbls')
        input_test_noise = tf.placeholder(dtype=tf.float32,
                                          shape=[
                                              self.n_testsamples**np.minimum(
                                                  2, self.info_var_dim),
                                              self.unstructured_noise_dim
                                          ],
                                          name='input_test_noise')
        input_test_info_noise = tf.placeholder(
            dtype=tf.float32,
            shape=[
                self.n_testsamples**np.minimum(2, self.info_var_dim),
                self.info_var_dim
            ],
            name='input_test_info_noise')

        # Define model, loss, optimizer and summaries.
        logits_source, logits_class, logits_info, artificial_images = self._create_inference(
            input_images, input_lbls, input_unstructured_noise,
            input_info_noise)
        loss_discriminator, loss_generator = self._create_losses(
            logits_source, logits_class, logits_info, artificial_images,
            input_lbls, input_info_noise)
        train_op_discriminator, train_op_generator = self._create_optimizer(
            loss_discriminator, loss_generator)
        summary_op_dloss, summary_op_gloss, summary_op_img, summary_img = self._create_summaries(
            loss_discriminator, loss_generator, input_test_noise,
            input_test_lbls, input_test_info_noise)

        # show network architecture
        utils.show_all_variables()

        # create constant test variable to inspect changes in the model
        self.combinations_info_var = itertools.combinations(
            range(self.info_var_dim), 2)
        self.combinations_info_var = list(self.combinations_info_var)

        test_noise, test_info = self._genTestInput()

        with tf.Session() as sess:
            # Initialize all model Variables.
            sess.run(tf.global_variables_initializer())

            # Create Saver object for loading and storing checkpoints
            saver = tf.train.Saver(max_to_keep=500)

            # Create Writer object for storing graph and summaries for TensorBoard
            writer = tf.summary.FileWriter(self.dir_logs, sess.graph)

            # Reload Tensor values from latest checkpoint
            ckpt = tf.train.get_checkpoint_state(self.dir_checkpoints)
            epoch_start = 0
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
                epoch_start = int(ckpt_name.split('-')[-1]) + 1

            interationCnt = 0
            for epoch_n in range(epoch_start, self.epoch_max):

                # Test model output before any training
                if epoch_n == 0:
                    for class_n in range(self.lbls_dim):
                        test_lbls = np.zeros([
                            self.n_testsamples**np.minimum(
                                2, self.info_var_dim), self.lbls_dim
                        ])
                        test_lbls[:, class_n] = 1

                        for i in range(len(test_info)):
                            test_info_combi = test_info[i]

                            _, summaryImg = sess.run(
                                [summary_op_img, summary_img],
                                feed_dict={
                                    input_test_noise: test_noise,
                                    input_test_lbls: test_lbls,
                                    input_test_info_noise: test_info_combi
                                })

                            dir_result_train_class = dir_results_train + '/' + str(
                                class_n).zfill(2)
                            if self.info_var_dim < 2:
                                filename_temp = 'Epoch_{0}_LatentVar_1'.format(
                                    epoch_n)
                            else:
                                filename_temp = 'Epoch_{0}_LatentCombi_{1}_{2}'.format(
                                    epoch_n, self.combinations_info_var[i][0],
                                    self.combinations_info_var[i][1])

                            # writer.add_summary(summaryImg_tb, global_step=epoch_n)
                            utils.save_image_local(summaryImg,
                                                   dir_result_train_class,
                                                   filename_temp)

                # Initiate or Re-initiate iterator
                sess.run(iterator.initializer)

                ### ----------------------------------------------------------
                ### Update model
                if (np.mod(epoch_n, 100) == 0) or epoch_n < 25:
                    utils.show_message(
                        'Running training epoch no: {0}'.format(epoch_n))

                while True:
                    # for idx in range(0, num_batches):
                    try:
                        for _ in range(self.d_iter):
                            image_batch, lbl_batch, unst_noise_batch, info_noise_batch = sess.run(
                                input_getBatch)

                            if (image_batch.shape[0] != self.batch_size):
                                raise OutOfRangeError

                            _, summary_dloss = sess.run(
                                [train_op_discriminator, summary_op_dloss],
                                feed_dict={
                                    input_images: image_batch,
                                    input_lbls: lbl_batch,
                                    input_unstructured_noise: unst_noise_batch,
                                    input_info_noise: info_noise_batch
                                })

                        writer.add_summary(summary_dloss,
                                           global_step=interationCnt)

                        _, summary_gloss = sess.run(
                            [train_op_generator, summary_op_gloss],
                            feed_dict={
                                input_images: image_batch,
                                input_lbls: lbl_batch,
                                input_unstructured_noise: unst_noise_batch,
                                input_info_noise: info_noise_batch
                            })

                        writer.add_summary(summary_gloss,
                                           global_step=interationCnt)
                        interationCnt += 1

                    except (tf.errors.OutOfRangeError, OutOfRangeError):
                        # Test current model
                        for class_n in range(self.lbls_dim):
                            test_lbls = np.zeros([
                                self.n_testsamples**np.minimum(
                                    2, self.info_var_dim), self.lbls_dim
                            ])
                            test_lbls[:, class_n] = 1

                            for i in range(len(test_info)):
                                test_info_combi = test_info[i]

                                _, summaryImg = sess.run(
                                    [summary_op_img, summary_img],
                                    feed_dict={
                                        input_test_noise: test_noise,
                                        input_test_lbls: test_lbls,
                                        input_test_info_noise: test_info_combi
                                    })

                                dir_result_train_class = dir_results_train + '/' + str(
                                    class_n).zfill(2)
                                if self.info_var_dim < 2:
                                    filename_temp = 'Epoch_{0}_LatentVar_1'.format(
                                        epoch_n)
                                else:
                                    filename_temp = 'Epoch_{0}_LatentCombi_{1}_{2}'.format(
                                        epoch_n,
                                        self.combinations_info_var[i][0],
                                        self.combinations_info_var[i][1])

                                # writer.add_summary(summaryImg_tb, global_step=epoch_n)
                                utils.save_image_local(summaryImg,
                                                       dir_result_train_class,
                                                       filename_temp)

                        break

                # Save model variables to checkpoint
                if (epoch_n + 1) % self.backup_frequency == 0:
                    saver.save(sess,
                               os.path.join(self.dir_checkpoints,
                                            self.model + '.model'),
                               global_step=epoch_n)