def evaluate(): g = tf.Graph() with g.as_default(): image_list, label_list = data_process.read_labeled_image_list( FLAGS.input_file) # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ; # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person image_list, label_list = data_process.make_rnn_input_per_seq_length_size( image_list, label_list, FLAGS.seq_length) images = tf.convert_to_tensor(image_list) labels = tf.convert_to_tensor(label_list) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels, images], num_epochs=None, shuffle=False, seed=None, capacity=1000, shared_name=None, name=None) images_batch, labels_batch, image_locations_batch = data_process.decodeRGB( input_queue, FLAGS.seq_length, FLAGS.size) images_batch = tf.to_float(images_batch) images_batch -= 128.0 images_batch /= 128.0 # scale all pixel values in range: [-1,1] images_batch = tf.reshape(images_batch, [-1, 96, 96, 3]) labels_batch = tf.reshape(labels_batch, [-1, 2]) if FLAGS.network == 'vggface_4096': from vggface import vggface_4096x4096x2 as net network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() elif FLAGS.network == 'vggface_2000': from vggface import vggface_4096x2000x2 as net network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() elif FLAGS.network == 'affwildnet_resnet': from tensorflow.contrib.slim.python.slim.nets import resnet_v1 with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(inputs=images_batch, is_training=False, num_classes=None) with tf.variable_scope('rnn') as scope: cnn = tf.reshape( net, [FLAGS.batch_size, FLAGS.sequence_length, -1]) cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)]) outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32) outputs = tf.reshape( outputs, (FLAGS.batch_size * FLAGS.sequence_length, 128)) weights_initializer = tf.truncated_normal_initializer( stddev=0.01) weights = tf.get_variable('weights_output', shape=[128, 2], initializer=weights_initializer, trainable=True) biases = tf.get_variable('biases_output', shape=[2], initializer=tf.zeros_initializer, trainable=True) prediction = tf.nn.xw_plus_b(outputs, weights, biases) elif FLAGS.network == 'affwildnet_vggface': from affwildnet import vggface_gru as net network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() num_batches = int(len(image_list) / FLAGS.batch_size) variables_to_restore = tf.global_variables() with tf.Session() as sess: init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=False) init_fn(sess) print('Loading model {}'.format( FLAGS.pretrained_model_checkpoint_path)) tf.train.start_queue_runners(sess=sess) coord = tf.train.Coordinator() evaluated_predictions = [] evaluated_labels = [] images = [] try: for _ in range(num_batches): pr, l, imm = sess.run( [prediction, labels_batch, image_locations_batch]) evaluated_predictions.append(pr) evaluated_labels.append(l) images.append(imm) if coord.should_stop(): break coord.request_stop() except Exception as e: coord.request_stop(e) predictions = np.reshape(evaluated_predictions, (-1, 2)) labels = np.reshape(evaluated_labels, (-1, 2)) images = np.reshape(images, (-1)) conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1]) conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0]) for i in range(len(predictions)): print("Labels: ", labels[i], "Predictions: ", predictions[i], "Error: ", (abs(labels[i] - predictions[i]))) print( "------------------------------------------------------------------------------" ) print('Concordance on valence : {}'.format(conc_valence)) print('Concordance on arousal : {}'.format(conc_arousal)) print('Concordance on total : {}'.format( (conc_arousal + conc_valence) / 2)) mse_arousal = sum( (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1]) print('MSE Arousal : {}'.format(mse_arousal)) mse_valence = sum( (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0]) print('MSE Valence : {}'.format(mse_valence)) return conc_valence, conc_arousal, ( conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
def train(): g = tf.Graph() with g.as_default(): image_list, label_list = data_process.read_labeled_image_list( FLAGS.input_file) # split into sequences image_list, label_list = data_process.make_rnn_input_per_seq_length_size( image_list, label_list, FLAGS.seq_length) images = tf.convert_to_tensor(image_list) labels = tf.convert_to_tensor(label_list) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels, images], num_epochs=None, shuffle=True, seed=None, capacity=1000, shared_name=None, name=None) images_sequence, labels_sequence, image_locations_sequence = data_process.decodeRGB( input_queue, FLAGS.seq_length, FLAG.size) images_sequence = tf.to_float(images_sequence) images_sequence -= 128.0 images_sequence /= 128.0 # scale all pixel values in range: [-1,1] images_batch, labels_batch, image_locations_batch = tf.train.shuffle_batch( [images_sequence, labels_sequence, image_locations_sequence], batch_size=FLAGS.batch_size, min_after_dequeue=100, num_threads=1, capacity=1000) images_batch = tf.reshape(images_batch, [-1, 96, 96, 3]) labels_batch = tf.reshape(labels_batch, [FLAGS.batch_size, FLAGS.seq_length, 2]) if FLAGS.network == 'CNN_GRU_1RNN': network = AffWildNet.CNN_GRU_1RNN(FLAGS.seq_length, FLAGS.batch_size, FLAGS.h_units) elif FLAGS.network == 'CNN_GRU_3RNN': network = AffWildNet.CNN_GRU_3RNN(FLAGS.seq_length, FLAGS.batch_size, FLAGS.h_units) network.setup(images_batch) prediction = network.get_output() prediction = tf.reshape(prediction, [FLAGS.batch_size, FLAGS.seq_length, 2]) for i, name in enumerate(['valence', 'arousal']): preds = [] labs = [] for j in range(FLAGS.batch_size): pred_single = tf.reshape(prediction[j, :, i], (-1, )) gt_single = tf.reshape(labels_batch[j, :, i], (-1, )) preds.append(tf.reduce_mean(pred_single)) labs.append(tf.reduce_mean(gt_single)) preds = tf.convert_to_tensor(preds) labs = tf.convert_to_tensor(labs) if FLAGS.concordance_loss: loss = concordance_cc2(preds, labs) else: loss = tf.reduce_mean(tf.square(preds - labs)) slim.losses.add_loss(loss / 2.) total_loss = slim.losses.get_total_loss() optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) ## if you want to restore only a subset of the weights/biases, replace tf.global_variables() with another subset variables_to_restore = tf.global_variables() with tf.Session(graph=g) as sess: if FLAGS.pretrained_model_checkpoint_path: init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=True) else: init_fn = None ## here in variables_to_train I have declared all weights and biases, if you want to train only a subset then change accordingly train_op = slim.learning.create_train_op( total_loss, optimizer, variables_to_train=tf.global_variables(), summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, init_fn=init_fn, save_summaries_secs=600 * 360, log_every_n_steps=500, save_interval_secs=60 * 15)
def evaluate(): g = tf.Graph() with g.as_default(): image_list, label_list = data_process.read_labeled_image_list( FLAGS.input_file) # split into sequences image_list, label_list = data_process.make_rnn_input_per_seq_length_size( image_list, label_list, FLAGS.seq_length) images = tf.convert_to_tensor(image_list) labels = tf.convert_to_tensor(label_list) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels, images], num_epochs=None, shuffle=False, seed=None, capacity=1000, shared_name=None, name=None) images_batch, labels_batch, image_locations_batch = data_process.decodeRGB( input_queue, FLAGS.seq_length, FLAGS.size) images_batch = tf.to_float(images_batch) images_batch -= 128.0 images_batch /= 128.0 # scale all pixel values in range: [-1,1] images_batch = tf.reshape(images_batch, [-1, 96, 96, 3]) labels_batch = tf.reshape(labels_batch, [-1, 2]) if FLAGS.network == 'CNN_GRU_1RNN': network = AffWildNet.CNN_GRU_1RNN(FLAGS.seq_length, FLAGS.batch_size, FLAGS.h_units) elif FLAGS.network == 'CNN_GRU_3RNN': network = AffWildNet.CNN_GRU_3RNN(FLAGS.seq_length, FLAGS.batch_size, FLAGS.h_units) network.setup(images_batch) prediction = network.get_output() num_batches = int(len(image_list) / FLAGS.batch_size) variables_to_restore = tf.global_variables() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.5 with tf.Session() as sess: init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=False) init_fn(sess) print('Loading model {}'.format( FLAGS.pretrained_model_checkpoint_path)) tf.train.start_queue_runners(sess=sess) coord = tf.train.Coordinator() evaluated_predictions = [] evaluated_labels = [] images = [] try: for _ in range(num_batches): pr, l, imm = sess.run( [prediction, labels_batch, image_locations_batch]) evaluated_predictions.append(pr) evaluated_labels.append(l) images.append(imm) if coord.should_stop(): break coord.request_stop() except Exception as e: coord.request_stop(e) predictions = np.reshape(evaluated_predictions, (-1, 2)) labels = np.reshape(evaluated_labels, (-1, 2)) images = np.reshape(images, (-1)) conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1]) conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0]) print('Concordance on valence : {}'.format(conc_valence)) print('Concordance on arousal : {}'.format(conc_arousal)) print('Concordance on total : {}'.format( (conc_arousal + conc_valence) / 2)) mse_arousal = sum( (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1]) print('MSE Arousal : {}'.format(mse_arousal)) mse_valence = sum( (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0]) print('MSE Valence : {}'.format(mse_valence)) return conc_valence, conc_arousal, ( conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
def predict(image_path): g = tf.Graph() with g.as_default(): # overwrite input file with path of image to predict input_file = FLAGS.input_file with open(input_file, 'w') as f: f.write(image_path + ',0,0') #read input data image_list, label_list = data_process.read_labeled_image_list( input_file) # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ; # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person image_list, label_list = data_process.make_rnn_input_per_seq_length_size( image_list, label_list, FLAGS.seq_length) images = tf.convert_to_tensor(image_list) labels = tf.convert_to_tensor(label_list) # Makes an input queue input_queue = tf.train.slice_input_producer([images, labels, images], num_epochs=None, shuffle=False, seed=None, capacity=1000, shared_name=None, name=None) images_batch, labels_batch, image_locations_batch = data_process.decodeRGB( input_queue, FLAGS.seq_length, FLAGS.size) images_batch = tf.to_float(images_batch) images_batch -= 128.0 images_batch /= 128.0 # scale all pixel values in range: [-1,1] images_batch = tf.reshape(images_batch, [-1, 96, 96, 3]) labels_batch = tf.reshape(labels_batch, [-1, 2]) if FLAGS.network == 'affwildnet_vggface': from affwildnet import vggface_gru as net network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length) network.setup(images_batch) prediction = network.get_output() # num_batches = int(len(image_list) / FLAGS.batch_size) variables_to_restore = tf.global_variables() # with tf.Session() as sess: init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore, ignore_missing_vars=False) init_fn(sess) print('Loading model {}'.format( FLAGS.pretrained_model_checkpoint_path)) tf.train.start_queue_runners(sess=sess) coord = tf.train.Coordinator() evaluated_predictions = [] evaluated_labels = [] images = [] try: for _ in range(num_batches): pr, l, imm = sess.run( [prediction, labels_batch, image_locations_batch]) evaluated_predictions.append(pr) evaluated_labels.append(l) images.append(imm) if coord.should_stop(): break coord.request_stop() except Exception as e: coord.request_stop(e) predictions = np.reshape(evaluated_predictions, (-1, 2)) labels = np.reshape(evaluated_labels, (-1, 2)) images = np.reshape(images, (-1)) valence = sum((predictions[:, 0])) / len(predictions[:, 0]) print('Valence : {}'.format(valence)) arousal = sum((predictions[:, 1])) / len(predictions[:, 1]) print('Arousal : {}'.format(arousal)) return valence, arousal