Ejemplo n.º 1
0
def run():
    """ Trainer Runner

    Runs the ALL Detection System 2019 NCS1 Classifier Trainer.
    """

    humanStart, clockStart = Trainer.Helpers.timerStart()

    Trainer.Helpers.logger.info(
        "ALL Detection System 2019 NCS1 Trainer started.")

    # Open the labels file
    Trainer.labels = open(
        Trainer.confs["Classifier"]["DatasetDir"] + "/" +
        Trainer.confs["Classifier"]["Labels"], 'r')

    # Create a dictionary to refer each label to their string name
    for line in Trainer.labels:
        label, string_name = line.split(':')
        string_name = string_name[:-1]  # Remove newline
        Trainer.labelsToName[int(label)] = string_name

    # Create a dictionary that will help people understand your dataset better. This is required by the Dataset class later.
    Trainer.items_to_descriptions = {
        'image': 'A 3-channel RGB coloured  image that is ex: office, people',
        'label': 'A label that ,start from zero'
    }

    # Create the log directory here. Must be done here otherwise import will activate this unneededly.
    if not os.path.exists(Trainer.confs["Classifier"]["LogDir"]):
        os.mkdir(Trainer.confs["Classifier"]["LogDir"])

    # Now we start to construct the graph and build our model
    with tf.Graph().as_default() as graph:
        # Set the verbosity to INFO level
        tf.logging.set_verbosity(tf.logging.INFO)

        # First create the dataset and load one batch
        dataset = Trainer.getSplit('train')
        images, _, labels = Trainer.loadBatch(dataset)

        # Know the number steps to take before decaying the learning rate and batches per epoch
        num_batches_per_epoch = dataset.num_samples // Trainer.confs[
            "Classifier"]["BatchSize"]
        # Because one step is one batch processed
        num_steps_per_epoch = num_batches_per_epoch
        decay_steps = int(Trainer.confs["Classifier"]["EpochsBeforeDecay"] *
                          num_steps_per_epoch)

        # Create the model inference
        with slim.arg_scope(inception_v3_arg_scope()):
            logits, end_points = inception_v3(images,
                                              num_classes=dataset.num_classes,
                                              is_training=True)

        # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
        loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                               logits=logits)
        # obtain the regularization losses as well
        total_loss = tf.losses.get_total_loss()

        # Create the global step for monitoring the learning_rate and training.
        global_step = get_or_create_global_step()

        # Define your exponentially decaying learning rate
        lr = tf.train.exponential_decay(
            learning_rate=Trainer.confs["Classifier"]["LearningRate"],
            global_step=global_step,
            decay_steps=decay_steps,
            decay_rate=Trainer.confs["Classifier"]["LearningRateDecay"],
            staircase=True)

        # Now we can define the optimizer that takes on the learning rate
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        # optimizer = tf.train.RMSPropOptimizer(learning_rate = lr, momentum=0.9)

        # Create the train_op.
        train_op = slim.learning.create_train_op(total_loss, optimizer)

        # State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
        predictions = tf.argmax(end_points['Predictions'], 1)
        probabilities = end_points['Predictions']
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)
        metrics_op = tf.group(accuracy_update, probabilities)

        # Now finally create all the summaries you need to monitor and group them into one summary op.
        tf.summary.scalar('losses/Total_Loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('learning_rate', lr)
        my_summary_op = tf.summary.merge_all()

        # Now we need to create a training step function that runs both the train_op, metrics_op and updates the global_step concurrently.
        def train_step(sess, train_op, global_step, epochCount):
            '''
            Simply runs a session for the three arguments provided and gives a logging on the time elapsed for each global step
            '''
            # Check the time for each sess run
            start_time = time.time()
            total_loss, global_step_count, _ = sess.run(
                [train_op, global_step, metrics_op])
            time_elapsed = time.time() - start_time

            # Run the logging to print some results
            logging.info(' Epch %.2f Glb Stp %s: Loss: %.4f (%.2f sec/step)',
                         epochCount, global_step_count, total_loss,
                         time_elapsed)

            return total_loss, global_step_count

        # Define your supervisor for running a managed session. Do not run the summary_op automatically or else it will consume too much memory
        sv = tf.train.Supervisor(logdir=Trainer.confs["Classifier"]["LogDir"],
                                 summary_op=None)

        # Run the managed session
        with sv.managed_session() as sess:
            for step in range(num_steps_per_epoch *
                              Trainer.confs["Classifier"]["Epochs"]):
                # At the start of every epoch, show the vital information:
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch %s/%s',
                                 step / num_batches_per_epoch + 1,
                                 Trainer.confs["Classifier"]["Epochs"])
                    learning_rate_value, accuracy_value = sess.run(
                        [lr, accuracy])
                    logging.info('Current Learning Rate: %s',
                                 learning_rate_value)
                    logging.info('Current Streaming Accuracy: %s',
                                 accuracy_value)

                    # optionally, print your logits and predictions for a sanity check that things are going fine.
                    logits_value, probabilities_value, predictions_value, labels_value = sess.run(
                        [logits, probabilities, predictions, labels])
                    print('logits: \n', logits_value[:5])
                    print('Probabilities: \n', probabilities_value[:5])
                    print('predictions: \n', predictions_value[:100])
                    print('Labels:\n:', labels_value[:100])

                # Log the summaries every 10 step.
                if step % 10 == 0:
                    loss, _ = train_step(sess, train_op, sv.global_step,
                                         step / num_batches_per_epoch + 1)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)
                # If not, simply run the training step
                else:
                    loss, _ = train_step(sess, train_op, sv.global_step,
                                         step / num_batches_per_epoch + 1)

            # We log the final training loss and accuracy
            logging.info('Final Loss: %s', loss)
            logging.info('Final Accuracy: %s', sess.run(accuracy))

            # Once all the training has been done, save the log files and checkpoint model
            logging.info('Finished training! Saving model to disk now.')

    checkpoint_file = tf.train.latest_checkpoint(
        Trainer.confs["Classifier"]["LogDir"])

    with tf.Graph().as_default() as graph:

        # images = tf.placeholder(shape=[None, ImageSize, ImageSize, 3], dtype=tf.float32, name = 'Placeholder_only')
        images = tf.placeholder("float", [
            1, Trainer.confs["Classifier"]["ImageSize"],
            Trainer.confs["Classifier"]["ImageSize"], 3
        ],
                                name="input")

        with slim.arg_scope(inception_v3_arg_scope()):
            logits, end_points = inception_v3(
                images,
                num_classes=Trainer.confs["Classifier"]["NumClasses"],
                is_training=False)
        probabilities = tf.nn.softmax(logits)
        saver = tf.train.Saver(slim.get_variables_to_restore())

        # Setup graph def
        input_graph_def = graph.as_graph_def()
        output_node_names = Trainer.confs["Classifier"]["OutputNode"]
        output_graph_name = Trainer.confs["Classifier"]["ALLGraph"]

        with tf.Session() as sess:
            saver.restore(sess, checkpoint_file)

            # Exporting the graph
            print("Exporting graph...")
            output_graph_def = graph_util.convert_variables_to_constants(
                sess, input_graph_def, output_node_names.split(","))

            with tf.gfile.GFile(output_graph_name, "wb") as f:
                f.write(output_graph_def.SerializeToString())

        clockEnd, difference, humanEnd = Trainer.Helpers.timerEnd(clockStart)

    Trainer.Helpers.logger.info(
        "ALL Detection System 2019 NCS1 Trainer ended in " + str(difference))
Ejemplo n.º 2
0
def run():

    # Create LogDirEval for evaluation information
    if not os.path.exists(Eval.confs["Classifier"]["LogDirEval"]):
        os.mkdir(Eval.confs["Classifier"]["LogDirEval"])

    # Just construct the graph from scratch again
    with tf.Graph().as_default() as graph:

        tf.logging.set_verbosity(tf.logging.INFO)

        # Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing
        dataset = Eval.getSplit('validation')
        images, raw_images, labels = Eval.loadBatch(dataset, is_training=False)

        # Create some information about the training steps
        num_batches_per_epoch = dataset.num_samples / \
            Eval.confs["Classifier"]["BatchTestSize"]
        num_steps_per_epoch = num_batches_per_epoch

        # Now create the inference model but set is_training=False
        with slim.arg_scope(inception_v3_arg_scope()):
            logits, end_points = inception_v3(
                images, num_classes=dataset.num_classes, is_training=False)

        # Perform one-hot-encoding of the labels (Try one-hot-encoding within the load_batch function!)
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

        # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced with checks
        loss = tf.losses.softmax_cross_entropy(
            onehot_labels=one_hot_labels, logits=logits)
        # obtain the regularization losses as well
        total_loss = tf.losses.get_total_loss()

        # #get all the variables to restore from the checkpoint file and create the saver function to restore
        variables_to_restore = slim.get_variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        def restore_fn(sess):
            return saver.restore(sess, Eval.checkpoint_file)

        # Just define the metrics to track without the loss or whatsoever
        probabilities = end_points['Predictions']
        predictions = tf.argmax(probabilities, 1)

        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(
            predictions, labels)
        metrics_op = tf.group(accuracy_update)

        # Create the global step and an increment op for monitoring
        global_step = get_or_create_global_step()
        # no apply_gradient method so manually increasing the global_step
        global_step_op = tf.assign(global_step, global_step + 1)

        # Create a evaluation step function
        def eval_step(sess, metrics_op, global_step):
            '''
            Simply takes in a session, runs the metrics op and some logging information.
            '''
            start_time = time.time()
            _, global_step_count, accuracy_value = sess.run(
                [metrics_op, global_step_op, accuracy])
            time_elapsed = time.time() - start_time

            # Log some information
            logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)',
                         global_step_count, accuracy_value, time_elapsed)

            return accuracy_value

        # Define some scalar quantities to monitor
        tf.summary.scalar("Validation_Accuracy", accuracy)
        tf.summary.scalar("Validation_losses/Total_Loss", total_loss)
        my_summary_op = tf.summary.merge_all()

        # Get your supervisor
        sv = tf.train.Supervisor(
            logdir=Eval.confs["Classifier"]["LogDirEval"], summary_op=None, init_fn=restore_fn)

        # Now we are ready to run in one session
        with sv.managed_session() as sess:
            for step in range(int(num_batches_per_epoch * Eval.confs["Classifier"]["EpochsTest"])):
                # print vital information every start of the epoch as always
                if step % num_batches_per_epoch == 0:
                    logging.info('Epoch: %s/%s', step / num_batches_per_epoch + 1,
                                 Eval.confs["Classifier"]["EpochsTest"])
                    logging.info('Current Streaming Accuracy: %.4f',
                                 sess.run(accuracy))

                # Compute summaries every 10 steps and continue evaluating
                if step % 10 == 0:
                    eval_step(sess, metrics_op=metrics_op,
                              global_step=sv.global_step)
                    summaries = sess.run(my_summary_op)
                    sv.summary_computed(sess, summaries)

                # Otherwise just run as per normal
                else:
                    eval_step(sess, metrics_op=metrics_op,
                              global_step=sv.global_step)

            # At the end of all the evaluation, show the final accuracy
            logging.info('Final Streaming Accuracy: %.4f', sess.run(accuracy))

            # Now we want to visualize the last batch's images just to see what our model has predicted
            raw_images, labels, predictions, probabilities = sess.run(
                [raw_images, labels, predictions, probabilities])
            for i in range(10):
                image, label, prediction, probability = raw_images[
                    i], labels[i], predictions[i], probabilities[i]
                prediction_name, label_name = dataset.labels_to_name[
                    prediction], dataset.labels_to_name[label]
                text = 'Prediction: %s \n Ground Truth: %s \n Probability: %s' % (
                    prediction_name, label_name, probability[prediction])
                img_plot = plt.imshow(image)

                # Set up the plot and hide axes
                # plt.title(text)
                # img_plot.axes.get_yaxis().set_ticks([])
                # img_plot.axes.get_xaxis().set_ticks([])
                # plt.show()

            logging.info(
                'Model evaluation has completed! Visit TensorBoard for more information regarding your evaluation.')
            sv.saver.save(sess, sv.save_path, global_step=sv.global_step)