Beispiel #1
0
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    eval_data = FLAGS.eval_data == 'test'

    images, labels = drd.inputs(eval_data=eval_data)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    #logits = drd.inference(images, FLAGS.n_residual_blocks)
    logits = drd.inference_alex_net(images)
    # Calculate predictions.
    top_k_op = tf.nn.in_top_k(logits, labels, 1)
    pred = tf.cast(tf.argmax(logits, axis=1), tf.int32)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        drd.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op, logits, images, labels, pred)
      if FLAGS.run_once:
        break
      time.sleep(FLAGS.eval_interval_secs)
Beispiel #2
0
def evaluate():
    """Eval CIFAR-10 for a number of steps."""
    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        eval_data = FLAGS.eval_data == 'test'

        images, labels = drd.inputs(eval_data=eval_data)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        #logits = drd.inference(images, FLAGS.n_residual_blocks)
        logits, target_conv = drd.oxford_net_C(images)
        loss = drd.loss(logits, labels)
        # Calculate predictions.
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        pred = tf.cast(tf.argmax(logits, axis=1), tf.int32)
        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            drd.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        ########### Here the salienncy maps calculation begins############
        saliency = saliency_map(logits, images)

        y_c = tf.reduce_sum(tf.multiply(logits, tf.cast(labels, tf.float32)),
                            axis=1)

        target_conv_layer_grad = tf.gradients(y_c, target_conv)[0]
        print('target_conv_layer_grad:', target_conv_layer_grad)
        # Guided backpropagtion back to input layer
        gb_grad = tf.gradients(loss, images)[0]
        print('gb_grad:', gb_grad)

        while True:
            eval_once(saver, top_k_op, logits, images, labels, pred, saliency,
                      target_conv, target_conv_layer_grad, gb_grad)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # Get images and labels for CIFAR-10.
        images, labels, names = drd.distorted_inputs()
        #get validation data
        val_images, val_labels = drd.inputs(True)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        #logits1= drd.inference(images, FLAGS.n_residual_blocks)
        logits = drd.resnet_v1_50(images)
        val_logits = drd.resnet_v1_50(val_images)

        # calculate predictions
        predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32)
        val_predictions = tf.cast(tf.argmax(val_logits, axis=1), tf.int32)

        # ops for batch accuracy calcultion
        correct_prediction = tf.equal(predictions, labels)
        val_correct_prediction = tf.equal(val_predictions, labels)

        batch_accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                tf.float32))
        val_batch_accuracy = tf.reduce_mean(
            tf.cast(val_correct_prediction, tf.float32))

        # calculate training accuracy
        # Calculate loss.
        loss = drd.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = drd.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        variables = slim.get_variables_to_restore()
        variables_to_restore = [
            v for v in variables if not v.name.split('/')[-1] != 'weights:0'
        ]
        # Add ops to save and restore all the variables.
        saver_pre = tf.train.Saver(
            variables_to_restore[0:-2])  # exclude logits layer
        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # # Build an initialization operation to run below.
        init = tf.global_variables_initializer()
        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        # sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(FLAGS.save_dir, sess.graph)

        step_start = 0
        try:
            ####Trying to find last checkpoint file fore full final model exist###
            print("Trying to restore last checkpoint ...")
            save_dir = FLAGS.save_dir
            # Use TensorFlow to find the latest checkpoint - if any.
            last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir)
            # Try and load the data in the checkpoint.
            saver.restore(sess, save_path=last_chk_path)

            # If we get to this point, the checkpoint was successfully loaded.
            print("Restored checkpoint from:", last_chk_path)
            # get the step integer from restored path to start step from there
            step_start = int(
                filter(
                    str.isdigit,
                    unicodedata.normalize('NFKD', last_chk_path).encode(
                        'ascii', 'ignore')))

        except:
            # If all the above failed for some reason, simply
            # initialize all the variables for the TensorFlow graph.
            print(
                "Failed to restore any checkpoints. Initializing variables instead."
            )
            sess.run(init)

        accuracy_dev = []
        val_accuracy_dev = []
        for step in xrange(step_start, FLAGS.max_steps):
            start_time = time.time()
            _, loss_value, accuracy = sess.run(
                [train_op, loss, batch_accuracy])
            #im, lab,log = sess.run([images,labels,logits])
            #append the next accuray to the development list
            accuracy_dev.append(accuracy)
            #print([lab,log])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:

                im_id, val_acc = sess.run([names, val_batch_accuracy])

                val_accuracy_dev.append(val_acc)
                print("the image being trained on is {}".format(im_id))
                print("The average validation accuracy is: {}".format(
                    np.mean(val_accuracy_dev)))
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f, avg_batch_accuracy = %.2f, (%.1f examples/sec; %.3f '
                    'sec/batch)')
                # take averages of all the accuracies from the previous bathces
                print(format_str %
                      (datetime.now(), step, loss_value, np.mean(accuracy_dev),
                       examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
                #set paths and saving ops for the full and sub_network
                checkpoint_path = os.path.join(FLAGS.save_dir, 'model.ckpt')
                #pre_trained_path = os.path.join(FLAGS.pre_trained_dir, 'model.ckpt')

                saver.save(sess, checkpoint_path, global_step=step)
def train():
    #for which data set to use
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # Get images and labels
        images, labels = drd.distorted_inputs()
        #get validation data
        val_images, val_labels = drd.inputs(False)
        #get drop out probability
        print(images.get_shape(), val_images.get_shape())

        #logits1= drd.inference(images, FLAGS.n_residual_blocks)
        logits = model_2.inference(images,
                                   n=4,
                                   reuse=tf.AUTO_REUSE,
                                   is_training=True)
        val_logits = model_2.inference(images,
                                       n=4,
                                       reuse=tf.AUTO_REUSE,
                                       is_training=False)
        #logits = drd.resnet_v1_50(images, training=True)
        #val_logits = drd.resnet_v1_50(val_images, training = False)

        #softmx logits
        soft_max_logits = tf.nn.softmax(logits)
        soft_max_logits_val = tf.nn.softmax(val_logits)
        # calculate predictions
        predictions = tf.cast(tf.argmax(soft_max_logits, axis=1), tf.int32)
        val_predictions = tf.cast(tf.argmax(soft_max_logits_val, axis=1),
                                  tf.int32)

        # ops for batch accuracy calcultion
        correct_prediction = tf.equal(predictions, labels)
        val_correct_prediction = tf.equal(val_predictions, labels)

        batch_accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                tf.float32))
        val_batch_accuracy = tf.reduce_mean(
            tf.cast(val_correct_prediction, tf.float32))

        # Calculate loss, which includes softmax cross entropy and L2 regularization.
        cross_entropy = tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                               labels=labels)

        # Create a tensor named cross_entropy for logging purposes.
        tf.identity(cross_entropy, name='cross_entropy')
        tf.summary.scalar('cross_entropy', cross_entropy)

        # If no loss_filter_fn is passed, assume we want the default behavior,
        # which is that batch_normalization variables are excluded from loss.
        def exclude_batch_norm(name):
            return 'batch_normalization' not in name

        loss_filter_fn = None or exclude_batch_norm

        # Add weight decay to the loss.
        l2_loss = weight_decay * tf.add_n(
            # loss is computed using fp32 for numerical stability.
            [
                tf.nn.l2_loss(tf.cast(v, tf.float32))
                for v in tf.trainable_variables() if loss_filter_fn(v.name)
            ])
        tf.summary.scalar('l2_loss', l2_loss)
        loss = cross_entropy + l2_loss
        global_step = tf.train.get_or_create_global_step()

        #list of lr decay factors
        lr_decay_factors = [1, 0.1, 0.01, 0.001, 0.0001]
        learning_rate = 0.001
        # Create a tensor named learning_rate for logging purposes
        tf.identity(learning_rate, name='learning_rate')
        tf.summary.scalar('learning_rate', learning_rate)

        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=momentum)

        minimize_op = optimizer.minimize(loss, global_step)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        train_op = tf.group(minimize_op, update_ops)
        # calculate training accuracy
        # Calculate loss.
        #loss = drd.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #train_op = drd.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        #variables = slim.get_variables_to_restore()
        #variables_to_restore = [v for v in variables if not v.name.split('/')[-1] != 'weights:0']
        # Add ops to save and restore all the variables.
        #saver_pre = tf.train.Saver(variables_to_restore[0:-2])  # exclude logits layer
        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # # Build an initialization operation to run below.
        init = tf.global_variables_initializer()
        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        # sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(FLAGS.save_dir, sess.graph)

        step_start = 0
        try:
            ####Trying to find last checkpoint file fore full final model exist###
            print("Trying to restore last checkpoint ...")
            save_dir = FLAGS.save_dir
            # Use TensorFlow to find the latest checkpoint - if any.
            last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir)
            # Try and load the data in the checkpoint.
            saver.restore(sess, save_path=last_chk_path)

            # If we get to this point, the checkpoint was successfully loaded.
            print("Restored checkpoint from:", last_chk_path)
            # get the step integer from restored path to start step from there
            uninitialized_vars = []
            for var in tf.global_variables():
                try:
                    sess.run(var)
                except tf.errors.FailedPreconditionError:
                    print("not init")
                    print(var)
                    uninitialized_vars.append(var)

            # create init op for the still unitilized variables
            init_new_vars_op = tf.variables_initializer(uninitialized_vars)
            sess.run(init_new_vars_op)

        except:
            # If all the above failed for some reason, simply
            # initialize all the variables for the TensorFlow graph.
            print(
                "Failed to restore any checkpoints. Initializing variables instead."
            )
            sess.run(init)

        accuracy_dev = []
        val_accuracy_dev = []
        step_start = 0
        for step in range(step_start, FLAGS.max_steps):
            start_time = time.time()
            #run train op
            _, loss_value, accuracy, gs = sess.run(
                [train_op, loss, batch_accuracy, global_step])

            #setting up a learning rate decay scheme
            if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 30:
                learning_rate = learning_rate * lr_decay_factors[1]
            if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 60:
                learning_rate = learning_rate * lr_decay_factors[2]
            if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 90:
                learning_rate = learning_rate * lr_decay_factors[3]
            if ((gs * FLAGS.batch_size) / NUM_IMAGES) == 120:
                learning_rate = learning_rate * lr_decay_factors[4]

            accuracy_dev.append(accuracy)
            duration = time.time() - start_time
            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                val_acc = sess.run([val_batch_accuracy])
                val_accuracy_dev.append(val_acc)

                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f, avg_batch_accuracy = %.2f, (%.1f examples/sec; %.3f '
                    'sec/batch), validation accuracy %.2f')
                # take averages of all the accuracies from the previous bathces
                print(format_str % (datetime.now(), step, loss_value,
                                    np.mean(accuracy_dev), examples_per_sec,
                                    sec_per_batch, np.mean(val_accuracy_dev)))

            if step % 10 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
                #set paths and saving ops for the full and sub_network
                checkpoint_path = os.path.join(FLAGS.save_dir, 'model.ckpt')
                #pre_trained_path = os.path.join(FLAGS.pre_trained_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False, name='global_step')

        # Get images and labels for CIFAR-10.
        images, labels, names = drd.distorted_inputs()
        # get validation data
        val_images, val_labels = drd.inputs(True)
        # Build a Graph that computes the logits predictions from the
        # inference model.
        #logits1= drd.inference(images, FLAGS.n_residual_blocks)
        logits = drd.shallow_oxford_net_C(images)
        val_logits = drd.shallow_oxford_net_C(val_images)
        # calculate predictions
        predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32)
        val_predictions = tf.cast(tf.argmax(val_logits, axis=1), tf.int32)

        # ops for batch accuracy calcultion
        correct_prediction = tf.equal(predictions, labels)
        val_correct_prediction = tf.equal(val_predictions, labels)

        batch_accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                tf.float32))
        val_batch_accuracy = tf.reduce_mean(
            tf.cast(val_correct_prediction, tf.float32))

        tf.summary.scalar("Training Accuracy", batch_accuracy)

        # Calculate loss.
        loss = drd.loss(logits, labels)

        # updates the model parameters.
        train_op = drd.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        sub_network = 'oxford_net'
        #saver_30 = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=sub_network))

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        #Build an initialization operation to run below.
        init = tf.global_variables_initializer()
        #Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        step_start = 0
        try:
            ####Trying to find last checkpoint file fore full final model exist###
            print("Trying to restore last checkpoint ...")
            save_dir = FLAGS.save_dir
            # Use TensorFlow to find the latest checkpoint - if any.
            last_chk_path = tf.train.latest_checkpoint(checkpoint_dir=save_dir)
            # Try and load the data in the checkpoint.
            saver.restore(sess, save_path=last_chk_path)

            # If we get to this point, the checkpoint was successfully loaded.
            print("Restored checkpoint from:", last_chk_path)

            uninitialized_vars = []
            for var in tf.global_variables():
                try:
                    sess.run(var)
                except tf.errors.FailedPreconditionError:
                    uninitialized_vars.append(var)

            # create init op for the still unitilized variables
            init_new_vars_op = tf.variables_initializer(uninitialized_vars)
            sess.run(init_new_vars_op)

            # get the step integer from restored path to start step from there
            step_start = int(
                filter(
                    str.isdigit,
                    unicodedata.normalize('NFKD', last_chk_path).encode(
                        'ascii', 'ignore')))

        except:
            # If all the above failed for some reason, simply
            # initialize all the variables for the TensorFlow graph.
            print(
                "Failed to restore any checkpoints. Initializing variables instead."
            )
            sess.run(init)
        names_iterated = []
        accuracy_dev = []
        val_accuracy_dev = []

        for step in xrange(step_start, FLAGS.max_steps):
            start_time = time.time()
            _, loss_value, accuracy, names_strings = sess.run(
                [train_op, loss, batch_accuracy, names])
            #append the next accuray to the development list
            accuracy_dev.append(accuracy)
            names_iterated.append(names_strings)
            duration = time.time() - start_time
            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                im_id, val_acc = sess.run([names, val_batch_accuracy])
                val_accuracy_dev.append(val_acc)
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f, avg_batch_accuracy = %.2f, (%.1f examples/sec; %.3f '
                    'sec/batch), validation accuracy %.2f, image_name: %s')
                # take averages of all the accuracies from the previous bathces
                print(format_str %
                      (datetime.now(), step, loss_value, np.mean(accuracy_dev),
                       examples_per_sec, sec_per_batch,
                       np.mean(val_accuracy_dev), im_id))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
                #set paths and saving ops for the full and sub_network
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                pre_trained_path = os.path.join(FLAGS.pre_trained_dir,
                                                'model.ckpt')

                saver.save(sess, checkpoint_path, global_step=step)
                #saver_30.save(sess, pre_trained_path, global_step=step)

                #write files to disk to verify input pipeline is correct
                f = open("file_names_" + str(step), "w")
                f.write("\n".join(map(lambda x: str(x), names_iterated)))
                f.close()
                names_iterated = []