예제 #1
0
def evaluate():
    g = tf.Graph()
    with g.as_default():

        image_list, label_list = data_process.read_labeled_image_list(
            FLAGS.input_file)
        # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ;
        # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person
        image_list, label_list = data_process.make_rnn_input_per_seq_length_size(
            image_list, label_list, FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels, images],
                                                    num_epochs=None,
                                                    shuffle=False,
                                                    seed=None,
                                                    capacity=1000,
                                                    shared_name=None,
                                                    name=None)
        images_batch, labels_batch, image_locations_batch = data_process.decodeRGB(
            input_queue, FLAGS.seq_length, FLAGS.size)

        images_batch = tf.to_float(images_batch)
        images_batch -= 128.0
        images_batch /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch = tf.reshape(images_batch, [-1, 96, 96, 3])
        labels_batch = tf.reshape(labels_batch, [-1, 2])

        if FLAGS.network == 'vggface_4096':
            from vggface import vggface_4096x4096x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        elif FLAGS.network == 'vggface_2000':
            from vggface import vggface_4096x2000x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        elif FLAGS.network == 'affwildnet_resnet':
            from tensorflow.contrib.slim.python.slim.nets import resnet_v1
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, _ = resnet_v1.resnet_v1_50(inputs=images_batch,
                                                is_training=False,
                                                num_classes=None)

                with tf.variable_scope('rnn') as scope:
                    cnn = tf.reshape(
                        net, [FLAGS.batch_size, FLAGS.sequence_length, -1])
                    cell = tf.nn.rnn_cell.MultiRNNCell(
                        [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)])
                    outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32)
                    outputs = tf.reshape(
                        outputs,
                        (FLAGS.batch_size * FLAGS.sequence_length, 128))

                    weights_initializer = tf.truncated_normal_initializer(
                        stddev=0.01)
                    weights = tf.get_variable('weights_output',
                                              shape=[128, 2],
                                              initializer=weights_initializer,
                                              trainable=True)
                    biases = tf.get_variable('biases_output',
                                             shape=[2],
                                             initializer=tf.zeros_initializer,
                                             trainable=True)

                    prediction = tf.nn.xw_plus_b(outputs, weights, biases)

        elif FLAGS.network == 'affwildnet_vggface':
            from affwildnet import vggface_gru as net
            network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        num_batches = int(len(image_list) / FLAGS.batch_size)

        variables_to_restore = tf.global_variables()

        with tf.Session() as sess:

            init_fn = slim.assign_from_checkpoint_fn(
                FLAGS.pretrained_model_checkpoint_path,
                variables_to_restore,
                ignore_missing_vars=False)

            init_fn(sess)
            print('Loading model {}'.format(
                FLAGS.pretrained_model_checkpoint_path))

            tf.train.start_queue_runners(sess=sess)

            coord = tf.train.Coordinator()

            evaluated_predictions = []
            evaluated_labels = []
            images = []

            try:
                for _ in range(num_batches):

                    pr, l, imm = sess.run(
                        [prediction, labels_batch, image_locations_batch])
                    evaluated_predictions.append(pr)
                    evaluated_labels.append(l)
                    images.append(imm)

                    if coord.should_stop():
                        break
                coord.request_stop()
            except Exception as e:
                coord.request_stop(e)

            predictions = np.reshape(evaluated_predictions, (-1, 2))
            labels = np.reshape(evaluated_labels, (-1, 2))
            images = np.reshape(images, (-1))

            conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1])
            conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0])

            for i in range(len(predictions)):
                print("Labels: ", labels[i], "Predictions: ", predictions[i],
                      "Error: ", (abs(labels[i] - predictions[i])))
            print(
                "------------------------------------------------------------------------------"
            )
            print('Concordance on valence : {}'.format(conc_valence))
            print('Concordance on arousal : {}'.format(conc_arousal))
            print('Concordance on total : {}'.format(
                (conc_arousal + conc_valence) / 2))

            mse_arousal = sum(
                (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1])
            print('MSE Arousal : {}'.format(mse_arousal))
            mse_valence = sum(
                (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0])
            print('MSE Valence : {}'.format(mse_valence))

        return conc_valence, conc_arousal, (
            conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
예제 #2
0
def train():
    g = tf.Graph()
    with g.as_default():

        image_list, label_list = data_process.read_labeled_image_list(
            FLAGS.input_file)
        # split into sequences
        image_list, label_list = data_process.make_rnn_input_per_seq_length_size(
            image_list, label_list, FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels, images],
                                                    num_epochs=None,
                                                    shuffle=True,
                                                    seed=None,
                                                    capacity=1000,
                                                    shared_name=None,
                                                    name=None)
        images_sequence, labels_sequence, image_locations_sequence = data_process.decodeRGB(
            input_queue, FLAGS.seq_length, FLAG.size)
        images_sequence = tf.to_float(images_sequence)
        images_sequence -= 128.0
        images_sequence /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch, labels_batch, image_locations_batch = tf.train.shuffle_batch(
            [images_sequence, labels_sequence, image_locations_sequence],
            batch_size=FLAGS.batch_size,
            min_after_dequeue=100,
            num_threads=1,
            capacity=1000)

        images_batch = tf.reshape(images_batch, [-1, 96, 96, 3])

        labels_batch = tf.reshape(labels_batch,
                                  [FLAGS.batch_size, FLAGS.seq_length, 2])

        if FLAGS.network == 'CNN_GRU_1RNN':
            network = AffWildNet.CNN_GRU_1RNN(FLAGS.seq_length,
                                              FLAGS.batch_size, FLAGS.h_units)
        elif FLAGS.network == 'CNN_GRU_3RNN':
            network = AffWildNet.CNN_GRU_3RNN(FLAGS.seq_length,
                                              FLAGS.batch_size, FLAGS.h_units)

        network.setup(images_batch)
        prediction = network.get_output()

        prediction = tf.reshape(prediction,
                                [FLAGS.batch_size, FLAGS.seq_length, 2])
        for i, name in enumerate(['valence', 'arousal']):
            preds = []
            labs = []
            for j in range(FLAGS.batch_size):
                pred_single = tf.reshape(prediction[j, :, i], (-1, ))
                gt_single = tf.reshape(labels_batch[j, :, i], (-1, ))
                preds.append(tf.reduce_mean(pred_single))
                labs.append(tf.reduce_mean(gt_single))
            preds = tf.convert_to_tensor(preds)
            labs = tf.convert_to_tensor(labs)
            if FLAGS.concordance_loss:
                loss = concordance_cc2(preds, labs)
            else:
                loss = tf.reduce_mean(tf.square(preds - labs))
            slim.losses.add_loss(loss / 2.)

        total_loss = slim.losses.get_total_loss()
        optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate)

        ## if you want to restore only a subset of the weights/biases, replace tf.global_variables() with another subset
        variables_to_restore = tf.global_variables()

    with tf.Session(graph=g) as sess:
        if FLAGS.pretrained_model_checkpoint_path:
            init_fn = slim.assign_from_checkpoint_fn(
                FLAGS.pretrained_model_checkpoint_path,
                variables_to_restore,
                ignore_missing_vars=True)
        else:
            init_fn = None

        ## here in variables_to_train I have declared all weights and biases, if you want to train only a subset then change accordingly
        train_op = slim.learning.create_train_op(
            total_loss,
            optimizer,
            variables_to_train=tf.global_variables(),
            summarize_gradients=True)
        logging.set_verbosity(1)

        slim.learning.train(train_op,
                            FLAGS.train_dir,
                            init_fn=init_fn,
                            save_summaries_secs=600 * 360,
                            log_every_n_steps=500,
                            save_interval_secs=60 * 15)
예제 #3
0
def evaluate():
    g = tf.Graph()
    with g.as_default():

        image_list, label_list = data_process.read_labeled_image_list(
            FLAGS.input_file)
        # split into sequences
        image_list, label_list = data_process.make_rnn_input_per_seq_length_size(
            image_list, label_list, FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels, images],
                                                    num_epochs=None,
                                                    shuffle=False,
                                                    seed=None,
                                                    capacity=1000,
                                                    shared_name=None,
                                                    name=None)
        images_batch, labels_batch, image_locations_batch = data_process.decodeRGB(
            input_queue, FLAGS.seq_length, FLAGS.size)
        images_batch = tf.to_float(images_batch)
        images_batch -= 128.0
        images_batch /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch = tf.reshape(images_batch, [-1, 96, 96, 3])
        labels_batch = tf.reshape(labels_batch, [-1, 2])

        if FLAGS.network == 'CNN_GRU_1RNN':
            network = AffWildNet.CNN_GRU_1RNN(FLAGS.seq_length,
                                              FLAGS.batch_size, FLAGS.h_units)
        elif FLAGS.network == 'CNN_GRU_3RNN':
            network = AffWildNet.CNN_GRU_3RNN(FLAGS.seq_length,
                                              FLAGS.batch_size, FLAGS.h_units)

        network.setup(images_batch)
        prediction = network.get_output()

        num_batches = int(len(image_list) / FLAGS.batch_size)

        variables_to_restore = tf.global_variables()

        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.5

        with tf.Session() as sess:

            init_fn = slim.assign_from_checkpoint_fn(
                FLAGS.pretrained_model_checkpoint_path,
                variables_to_restore,
                ignore_missing_vars=False)

            init_fn(sess)
            print('Loading model {}'.format(
                FLAGS.pretrained_model_checkpoint_path))

            tf.train.start_queue_runners(sess=sess)

            coord = tf.train.Coordinator()

            evaluated_predictions = []
            evaluated_labels = []
            images = []

            try:
                for _ in range(num_batches):

                    pr, l, imm = sess.run(
                        [prediction, labels_batch, image_locations_batch])
                    evaluated_predictions.append(pr)
                    evaluated_labels.append(l)
                    images.append(imm)

                    if coord.should_stop():
                        break
                coord.request_stop()
            except Exception as e:
                coord.request_stop(e)

            predictions = np.reshape(evaluated_predictions, (-1, 2))
            labels = np.reshape(evaluated_labels, (-1, 2))
            images = np.reshape(images, (-1))

            conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1])
            conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0])

            print('Concordance on valence : {}'.format(conc_valence))
            print('Concordance on arousal : {}'.format(conc_arousal))
            print('Concordance on total : {}'.format(
                (conc_arousal + conc_valence) / 2))

            mse_arousal = sum(
                (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1])
            print('MSE Arousal : {}'.format(mse_arousal))
            mse_valence = sum(
                (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0])
            print('MSE Valence : {}'.format(mse_valence))

        return conc_valence, conc_arousal, (
            conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
예제 #4
0
def predict(image_path):
    g = tf.Graph()
    with g.as_default():

        # overwrite input file with path of image to predict
        input_file = FLAGS.input_file
        with open(input_file, 'w') as f:
            f.write(image_path + ',0,0')

        #read input data
        image_list, label_list = data_process.read_labeled_image_list(
            input_file)
        # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ;
        # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person
        image_list, label_list = data_process.make_rnn_input_per_seq_length_size(
            image_list, label_list, FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels, images],
                                                    num_epochs=None,
                                                    shuffle=False,
                                                    seed=None,
                                                    capacity=1000,
                                                    shared_name=None,
                                                    name=None)
        images_batch, labels_batch, image_locations_batch = data_process.decodeRGB(
            input_queue, FLAGS.seq_length, FLAGS.size)
        images_batch = tf.to_float(images_batch)
        images_batch -= 128.0
        images_batch /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch = tf.reshape(images_batch, [-1, 96, 96, 3])
        labels_batch = tf.reshape(labels_batch, [-1, 2])

        if FLAGS.network == 'affwildnet_vggface':
            from affwildnet import vggface_gru as net
            network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        #
        num_batches = int(len(image_list) / FLAGS.batch_size)
        variables_to_restore = tf.global_variables()

        #
        with tf.Session() as sess:
            init_fn = slim.assign_from_checkpoint_fn(
                FLAGS.pretrained_model_checkpoint_path,
                variables_to_restore,
                ignore_missing_vars=False)
            init_fn(sess)
            print('Loading model {}'.format(
                FLAGS.pretrained_model_checkpoint_path))
            tf.train.start_queue_runners(sess=sess)
            coord = tf.train.Coordinator()
            evaluated_predictions = []
            evaluated_labels = []
            images = []

            try:
                for _ in range(num_batches):
                    pr, l, imm = sess.run(
                        [prediction, labels_batch, image_locations_batch])
                    evaluated_predictions.append(pr)
                    evaluated_labels.append(l)
                    images.append(imm)
                    if coord.should_stop():
                        break
                coord.request_stop()
            except Exception as e:
                coord.request_stop(e)

            predictions = np.reshape(evaluated_predictions, (-1, 2))
            labels = np.reshape(evaluated_labels, (-1, 2))
            images = np.reshape(images, (-1))

            valence = sum((predictions[:, 0])) / len(predictions[:, 0])
            print('Valence : {}'.format(valence))
            arousal = sum((predictions[:, 1])) / len(predictions[:, 1])
            print('Arousal : {}'.format(arousal))

    return valence, arousal