def train(data_folder): g = tf.Graph() with g.as_default(): # Load dataset. frames, audio, ground_truth, _ = data_provider.get_split( data_folder, True, 'train', FLAGS.batch_size, seq_length=FLAGS.seq_length) # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): with slim.arg_scope( slim.nets.resnet_utils.resnet_arg_scope(is_training=True)): prediction = models.get_model(FLAGS.model)( frames, audio, hidden_units=FLAGS.hidden_units) for i, name in enumerate(['arousal', 'valence']): pred_single = tf.reshape(prediction[:, :, i], (-1, )) gt_single = tf.reshape(ground_truth[:, :, i], (-1, )) loss = losses.concordance_cc(pred_single, gt_single) tf.summary.scalar('losses/{} loss'.format(name), loss) mse = tf.reduce_mean(tf.square(pred_single - gt_single)) tf.summary.scalar('losses/mse {} loss'.format(name), mse) tf.losses.add_loss(loss / 2.) total_loss = tf.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) init_fn = None with tf.Session(graph=g) as sess: if FLAGS.pretrained_model_checkpoint_path: def name_in_checkpoint(var): return var.op.name[12:] variables_to_restore = slim.get_model_variables() init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, init_fn=init_fn, save_summaries_secs=60, save_interval_secs=300)
def train(data_folder): g = tf.Graph() with g.as_default(): # Load dataset. frames, audio, ground_truth, _ = data_provider.get_split( data_folder, True, 'train', FLAGS.batch_size, seq_length=FLAGS.seq_length) #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/framework/python/ops/arg_scope.py # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=True): with slim.arg_scope( slim.nets.resnet_utils.resnet_arg_scope(is_training=True)): prediction = models.recurrent_model( models.audio_model(audio_frames=audio), hidden_units=256) for i, name in enumerate(['arousal', 'valence']): pred_single = tf.reshape(prediction[:, :, i], (-1, )) gt_single = tf.reshape(ground_truth[:, :, i], (-1, )) loss = losses.concordance_cc(pred_single, gt_single) tf.summary.scalar('losses/{} loss'.format(name), loss) mse = tf.reduce_mean(tf.square(pred_single - gt_single)) tf.summary.scalar('losses/mse {} loss'.format(name), mse) slim.losses.add_loss(loss / 2.) total_loss = slim.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) optimizer = tf.train.AdamOptimizer(FLAGS.initial_learning_rate) init_fn = None with tf.Session(graph=g) as sess: if FLAGS.pretrained_model_checkpoint_path: # Need to specify which variables to restore (use scope of models) variables_to_restore = slim.get_variables() init_fn = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_checkpoint_path, variables_to_restore) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, init_fn=init_fn, save_summaries_secs=60, save_interval_secs=300)
def evaluate(file2eval, model_path): g = tf.Graph() with g.as_default(): total_nexamples = 0 filename_queue = tf.FIFOQueue(capacity=1, dtypes=[tf.string]) # Load dataset. audio_frames, labels, _ = get_split(filename_queue, False, FLAGS.portion, 1, seq_length=FLAGS.seq_length) # Define model graph. with slim.arg_scope([slim.layers.batch_norm, slim.layers.dropout], is_training=False): predictions = models.get_model(FLAGS.model)(audio_frames, hidden_units=FLAGS.hidden_units) coord = tf.train.Coordinator() variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: saver.restore(sess, model_path) tf.train.start_queue_runners(sess=sess, coord=coord) evaluated_predictions = [] evaluated_labels = [] print('Evaluating file : {}'.format(file2eval)) nexamples = _get_num_examples(file2eval) total_nexamples += nexamples num_batches = int(math.ceil(nexamples / (float(FLAGS.seq_length)))) sess.run(filename_queue.enqueue(file2eval)) sess.run(filename_queue.enqueue(file2eval)) for _ in range(num_batches): prediction_, label_ = sess.run([predictions, labels]) evaluated_predictions.append(prediction_[0]) evaluated_labels.append(label_[0]) evaluated_predictions = np.vstack(evaluated_predictions)[:nexamples] evaluated_labels = np.vstack(evaluated_labels)[:nexamples] for i in range(sess.run(filename_queue.size())): sess.run(filename_queue.dequeue()) if sess.run(filename_queue.size()) != 0: raise ValueError('Queue not empty!') coord.request_stop() return evaluated_predictions, evaluated_labels
def train(): tf.set_random_seed(1) g = tf.Graph() with g.as_default(): # Load dataset. audio_frames, word_embeddings, ground_truth = get_split(FLAGS.dataset_dir, True, FLAGS.batch_size, seq_length=FLAGS.sequence_length) # Define model graph. with slim.arg_scope([slim.layers.batch_norm, slim.layers.dropout], is_training=True): prediction = models.get_model(FLAGS.model)(audio_frames, emb=tf.cast(word_embeddings, tf.float32), hidden_units=FLAGS.hidden_units) optimizer = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate, beta1=0.9, beta2=0.99) count = 0 for i, name in enumerate(['arousal', 'valence', 'liking']): count += 1 pred_single = tf.reshape(prediction[:, :, i], (-1,)) gt_single = tf.reshape(ground_truth[:, :, i], (-1,)) loss = losses.concordance_cc(pred_single, gt_single) tf.summary.scalar('losses/{} loss'.format(name), loss) mse = tf.reduce_mean(tf.square(pred_single - gt_single)) tf.summary.scalar('losses/mse {} loss'.format(name), mse) tf.losses.add_loss(loss / count) # print(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) total_loss = tf.losses.get_total_loss() tf.summary.scalar('losses/total loss', total_loss) with tf.Session(graph=g) as sess: train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) logging.set_verbosity(1) slim.learning.train(train_op, FLAGS.train_dir, save_summaries_secs=60, save_interval_secs=120)
def train(configuration): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = repr(configuration.GPU) #################################################################################################################### # Interpret configuration arguments. #################################################################################################################### are_test_labels_available = configuration.are_test_labels_available task_name = "task" + repr(configuration.task) id_to_partition, partition_to_id = get_partition( configuration.challenge_folder) partition_to_chunk = get_partition_to_chunk( partition_to_id, configuration.tf_records_folder) train_size = len(partition_to_chunk["train"]) devel_size = len(partition_to_chunk["devel"]) test_size = len(partition_to_chunk["test"]) print(train_size, devel_size, test_size) train_steps_per_epoch = train_size // configuration.train_batch_size devel_steps_per_epoch = devel_size // configuration.devel_batch_size test_steps_per_epoch = test_size // configuration.test_batch_size print(train_steps_per_epoch, devel_steps_per_epoch, test_steps_per_epoch) tf_records_folder = configuration.tf_records_folder output_folder = configuration.output_folder make_dirs_safe(output_folder) if configuration.task == 1: targets = ["arousal", "valence"] number_of_targets = len(targets) elif configuration.task == 2: targets = ["arousal", "valence", "topic"] number_of_targets = 16 elif configuration.task == 3: targets = ["arousal", "valence", "trustworthiness"] number_of_targets = len(targets) else: raise ValueError("Invalid task selection.") feature_names = [k for k, v in configuration.use_data.items() if v is True] method_string = "_" + repr(configuration.task) + "_" + \ "_".join([k for k, v in configuration.use_data.items() if v is True]) + "_" + \ configuration.model_type + "_" + \ repr(configuration.hidden_units) + "_" + \ repr(configuration.use_attention) + "_" + \ repr(configuration.initial_learning_rate) + "_" + \ repr(configuration.GPU) method_string = configuration.output_folder + "/" + method_string saver_paths = dict() for target in targets: saver_paths[target] = method_string + "_" + target + "_" + repr( configuration.GPU) #################################################################################################################### # Form computational graph. #################################################################################################################### g = tf.Graph() with g.as_default(): with tf.Session() as sess: ############################################################################################################ # Get dataset iterators. ############################################################################################################ dataset_train = data_provider.get_split( tf_records_folder, is_training=True, task_name=task_name, split_name="train", are_test_labels_available=are_test_labels_available, id_to_partition=id_to_partition, feature_names=feature_names, batch_size=configuration.train_batch_size, seq_length=configuration.full_seq_length, buffer_size=(train_steps_per_epoch + 1) // 4) dataset_devel = data_provider.get_split( tf_records_folder, is_training=False, task_name=task_name, split_name="devel", are_test_labels_available=are_test_labels_available, id_to_partition=id_to_partition, feature_names=feature_names, batch_size=configuration.devel_batch_size, seq_length=configuration.full_seq_length, buffer_size=(devel_steps_per_epoch + 1) // 4) dataset_test = data_provider.get_split( tf_records_folder, is_training=False, task_name=task_name, split_name="test", are_test_labels_available=are_test_labels_available, id_to_partition=id_to_partition, feature_names=feature_names, batch_size=configuration.test_batch_size, seq_length=configuration.full_seq_length, buffer_size=(test_steps_per_epoch + 1) // 4) iterator_train = tf.data.Iterator.from_structure( dataset_train.output_types, dataset_train.output_shapes) iterator_devel = tf.data.Iterator.from_structure( dataset_devel.output_types, dataset_devel.output_shapes) iterator_test = tf.data.Iterator.from_structure( dataset_test.output_types, dataset_test.output_shapes) next_element_train = iterator_train.get_next() next_element_devel = iterator_devel.get_next() next_element_test = iterator_test.get_next() init_op_train = iterator_train.make_initializer(dataset_train) init_op_devel = iterator_devel.make_initializer(dataset_devel) init_op_test = iterator_test.make_initializer(dataset_test) ############################################################################################################ # Define placeholders. ############################################################################################################ batch_size_tensor = tf.placeholder(tf.int32) sequence_length_tensor = tf.placeholder(tf.int32) support_train = tf.placeholder(tf.float32, (None, None, 1)) if task_name == "task2": topic_train = tf.placeholder(tf.float32, (None, 10)) if task_name == "task3": trustworthiness_train = tf.placeholder(tf.float32, (None, None, 1)) if task_name in ["task1", "task3"]: arousal_train = tf.placeholder(tf.float32, (None, None, 1)) valence_train = tf.placeholder(tf.float32, (None, None, 1)) elif task_name == "task2": arousal_train = tf.placeholder(tf.float32, (None, 3)) valence_train = tf.placeholder(tf.float32, (None, 3)) else: raise ValueError step_id_train = tf.placeholder(tf.int32, (None, None, 1)) chunk_id_train = tf.placeholder(tf.int32, (None, None, 1)) recording_id_train = tf.placeholder(tf.int32, (None, None, 1)) tf_placeholder_train_dict = dict() for feature_name in feature_names: tf_placeholder_train_dict[feature_name] = tf.placeholder( tf.float32, (None, None, FEATURE_NUM[feature_name])) ############################################################################################################ # Define model graph and get model. ############################################################################################################ with tf.variable_scope("Model"): input_data_train = dict() for feature_name in feature_names: if configuration.use_data[feature_name]: input_data_train[ feature_name] = tf_placeholder_train_dict[ feature_name] if task_name in ["task1", "task3"]: use_pooling = False elif task_name == "task2": use_pooling = True else: raise ValueError pred_train = core.get_model( input_data_dict=input_data_train, support=support_train, model_type=configuration.model_type, batch_size=batch_size_tensor, number_of_outputs=number_of_targets, orig_seq_length=sequence_length_tensor, hidden_units=configuration.hidden_units, use_attention=configuration.use_attention, use_pooling=use_pooling) ############################################################################################################ # Define loss function. ############################################################################################################ tensor_shape_train = [batch_size_tensor, sequence_length_tensor] flattened_size_train = tensor_shape_train[0] * tensor_shape_train[1] if task_name in ["task1", "task3"]: pred_arousal_train = pred_train[:, :, 0] pred_valence_train = pred_train[:, :, 1] if task_name == "task3": pred_trustworthiness_train = pred_train[:, :, 2] single_pred_arousal_train = core.flatten_data( pred_arousal_train, flattened_size_train) single_pred_valence_train = core.flatten_data( pred_valence_train, flattened_size_train) if task_name == "task3": single_pred_trustworthiness_train = core.flatten_data( pred_trustworthiness_train, flattened_size_train) single_true_support_train = core.flatten_data( support_train, flattened_size_train) single_true_arousal_train = core.flatten_data( arousal_train, flattened_size_train) single_true_valence_train = core.flatten_data( valence_train, flattened_size_train) if task_name == "task3": single_true_trustworthiness_train = core.flatten_data( trustworthiness_train, flattened_size_train) elif task_name == "task2": pred_arousal_train = pred_train[:, 0:3] pred_valence_train = pred_train[:, 3:6] pred_topic_train = pred_train[:, 6:16] single_pred_arousal_train = pred_arousal_train single_pred_valence_train = pred_valence_train single_pred_topic_train = pred_topic_train single_true_support_train = core.flatten_data( support_train, flattened_size_train) single_true_arousal_train = arousal_train single_true_valence_train = valence_train single_true_topic_train = topic_train else: raise ValueError if task_name == "task1": loss = core.loss_function_task_1( pred_arousal=single_pred_arousal_train, true_arousal=single_true_arousal_train, pred_valence=single_pred_valence_train, true_valence=single_true_valence_train, support=single_true_support_train) elif task_name == "task2": loss = core.loss_function_task_2( pred_arousal=single_pred_arousal_train, true_arousal=single_true_arousal_train, pred_valence=single_pred_valence_train, true_valence=single_true_valence_train, pred_topic=single_pred_topic_train, true_topic=single_true_topic_train, support=single_true_support_train) elif task_name == "task3": loss = core.loss_function_task_3( pred_arousal=single_pred_arousal_train, true_arousal=single_true_arousal_train, pred_valence=single_pred_valence_train, true_valence=single_true_valence_train, pred_trustworthiness=single_pred_trustworthiness_train, true_trustworthiness=single_true_trustworthiness_train, support=single_true_support_train) else: raise NotImplementedError vars = tf.trainable_variables() model_vars = [v for v in vars if v.name.startswith("Model")] saver_dict = dict() for target in targets: saver_dict[target] = tf.train.Saver( {v.name: v for v in model_vars}) total_loss = tf.reduce_sum(loss) optimizer = tf.train.AdamOptimizer( configuration.initial_learning_rate) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients, _ = tf.clip_by_global_norm(gradients, 1.0) optimizer = optimizer.apply_gradients(zip(gradients, variables)) ############################################################################################################ # Initialize variables and perform experiment. ############################################################################################################ sess.run(tf.global_variables_initializer()) ############################################################################################################ # Train base model. ############################################################################################################ current_patience = 0 print("Start training base model.") print("Fresh base model.") for ee, epoch in enumerate(range(configuration.num_epochs)): print("EPOCH:", epoch + 1) input_feed_dict = { batch_size_tensor: "batch_size", sequence_length_tensor: "sequence_length", arousal_train: "arousal", valence_train: "valence", support_train: "support" } if task_name == "task2": input_feed_dict[topic_train] = "topic" if task_name == "task3": input_feed_dict[trustworthiness_train] = "trustworthiness" for feature_name in feature_names: input_feed_dict[ tf_placeholder_train_dict[feature_name]] = feature_name run_epoch = core.RunEpoch( sess=sess, partition="train", are_test_labels_available=are_test_labels_available, init_op=init_op_train, steps_per_epoch=train_steps_per_epoch, next_element=next_element_train, batch_size=configuration.train_batch_size, seq_length=configuration.full_seq_length, input_gaussian_noise=configuration.input_gaussian_noise, optimizer=optimizer, loss=total_loss, pred=pred_train, input_feed_dict=input_feed_dict, targets=targets, task_name=task_name) train_items, train_subject_to_id = run_epoch.run_epoch() if task_name == "task1": train_measures = core.get_measures_task_1(train_items) elif task_name == "task2": train_measures = core.get_measures_task_2(train_items) elif task_name == "task3": train_measures = core.get_measures_task_3(train_items) else: raise NotImplementedError print(method_string) if task_name == "task1": print("Train CCC:", train_measures["arousal"]["ccc"], train_measures["valence"]["ccc"], train_measures["ccc"]) print("Train CC:", train_measures["arousal"]["cc"], train_measures["valence"]["cc"], train_measures["cc"]) print("Train MAE:", train_measures["arousal"]["mae"], train_measures["valence"]["mae"], train_measures["mae"]) elif task_name == "task2": print("Train UAR:", train_measures["arousal"]["macro-recall"], train_measures["valence"]["macro-recall"], train_measures["topic"]["macro-recall"]) print("Train F1:", train_measures["arousal"]["micro-f1"], train_measures["valence"]["micro-f1"], train_measures["topic"]["micro-f1"]) print("Train TOT:", train_measures["arousal"]["score"], train_measures["valence"]["score"], train_measures["topic"]["score"]) elif task_name == "task3": print("Train CCC:", train_measures["trustworthiness"]["ccc"]) print("Train CC:", train_measures["trustworthiness"]["cc"]) print("Train MAE:", train_measures["trustworthiness"]["mae"]) else: raise NotImplementedError if ee == 0: best_performance_dict = dict() if task_name == "task1": for target in targets: best_performance_dict[target] = -1.0 elif task_name == "task2": for target in targets: best_performance_dict[target] = dict() for measure_name in [ "macro-recall", "micro-f1", "score" ]: best_performance_dict[target][ measure_name] = -1.0 elif task_name == "task3": best_performance_dict["trustworthiness"] = -1.0 else: raise NotImplementedError if (ee) % configuration.val_every_n_epoch == 0: input_feed_dict = { batch_size_tensor: "batch_size", sequence_length_tensor: "sequence_length", arousal_train: "arousal", valence_train: "valence", support_train: "support" } if task_name == "task2": input_feed_dict[topic_train] = "topic" if task_name == "task3": input_feed_dict[ trustworthiness_train] = "trustworthiness" for feature_name in feature_names: input_feed_dict[tf_placeholder_train_dict[ feature_name]] = feature_name run_epoch = core.RunEpoch( sess=sess, partition="devel", are_test_labels_available=are_test_labels_available, init_op=init_op_devel, steps_per_epoch=devel_steps_per_epoch, next_element=next_element_devel, batch_size=configuration.devel_batch_size, seq_length=configuration.full_seq_length, input_gaussian_noise=configuration. input_gaussian_noise, optimizer=None, loss=None, pred=pred_train, input_feed_dict=input_feed_dict, targets=targets, task_name=task_name) devel_items, devel_subject_to_id = run_epoch.run_epoch() if task_name == "task1": devel_measures = core.get_measures_task_1(devel_items) elif task_name == "task2": devel_measures = core.get_measures_task_2(devel_items) elif task_name == "task3": devel_measures = core.get_measures_task_3(devel_items) else: raise NotImplementedError if task_name == "task1": print("Devel CCC:", devel_measures["arousal"]["ccc"], devel_measures["valence"]["ccc"], devel_measures["ccc"]) print("Devel CC:", devel_measures["arousal"]["cc"], devel_measures["valence"]["cc"], devel_measures["cc"]) print("Devel MAE:", devel_measures["arousal"]["mae"], devel_measures["valence"]["mae"], devel_measures["mae"]) elif task_name == "task2": print("Devel UAR:", devel_measures["arousal"]["macro-recall"], devel_measures["valence"]["macro-recall"], devel_measures["topic"]["macro-recall"]) print("Devel F1:", devel_measures["arousal"]["micro-f1"], devel_measures["valence"]["micro-f1"], devel_measures["topic"]["micro-f1"]) print("Devel TOT:", devel_measures["arousal"]["score"], devel_measures["valence"]["score"], devel_measures["topic"]["score"]) elif task_name == "task3": print("Devel CCC:", devel_measures["trustworthiness"]["ccc"]) print("Devel CC:", devel_measures["trustworthiness"]["cc"]) print("Devel MAE:", devel_measures["trustworthiness"]["mae"]) else: raise NotImplementedError noticed_improvement = False if task_name == "task1": for target in targets: if best_performance_dict[target] < devel_measures[ target]["ccc"]: best_performance_dict[target] = devel_measures[ target]["ccc"] saver_dict[target].save( sess, saver_paths[target]) noticed_improvement = True elif task_name == "task2": for target in targets: if best_performance_dict[target][ "score"] < devel_measures[target]["score"]: for measure_name in [ "macro-recall", "micro-f1", "score" ]: best_performance_dict[target][ measure_name] = devel_measures[target][ measure_name] saver_dict[target].save( sess, saver_paths[target]) noticed_improvement = True elif task_name == "task3": if best_performance_dict[ "trustworthiness"] < devel_measures[ "trustworthiness"]["ccc"]: best_performance_dict[ "trustworthiness"] = devel_measures[ "trustworthiness"]["ccc"] saver_dict["trustworthiness"].save( sess, saver_paths["trustworthiness"]) noticed_improvement = True else: raise NotImplementedError if noticed_improvement: current_patience = 0 else: current_patience += 1 if current_patience > configuration.patience: break else: pass test_measures_dict = dict() test_items_dict = dict() for target in targets: if task_name == "task3": if target not in [ "trustworthiness", ]: continue saver_dict[target].restore(sess, saver_paths[target]) input_feed_dict = { batch_size_tensor: "batch_size", sequence_length_tensor: "sequence_length", arousal_train: "arousal", valence_train: "valence", support_train: "support" } if task_name == "task2": input_feed_dict[topic_train] = "topic" if task_name == "task3": input_feed_dict[trustworthiness_train] = "trustworthiness" for feature_name in feature_names: input_feed_dict[ tf_placeholder_train_dict[feature_name]] = feature_name run_epoch = core.RunEpoch( sess=sess, partition="test", are_test_labels_available=are_test_labels_available, init_op=init_op_test, steps_per_epoch=test_steps_per_epoch, next_element=next_element_test, batch_size=configuration.test_batch_size, seq_length=configuration.full_seq_length, input_gaussian_noise=configuration.input_gaussian_noise, optimizer=None, loss=None, pred=pred_train, input_feed_dict=input_feed_dict, targets=targets, task_name=task_name) test_items, test_subject_to_id = run_epoch.run_epoch() if are_test_labels_available: if task_name == "task1": test_measures = core.get_measures_task_1(test_items) elif task_name == "task2": test_measures = core.get_measures_task_2(test_items) elif task_name == "task3": test_measures = core.get_measures_task_3(test_items) else: raise NotImplementedError test_measures_dict[target] = test_measures test_items_dict[target] = test_items if task_name == "task1": print("Best devel CCC:", best_performance_dict["arousal"], best_performance_dict["valence"], (best_performance_dict["arousal"] + best_performance_dict["valence"]) / 2.0) if are_test_labels_available: print("Test CCC:", test_measures_dict["arousal"]["arousal"]["ccc"], test_measures_dict["valence"]["valence"]["ccc"], (test_measures_dict["arousal"]["arousal"]["ccc"] + test_measures_dict["valence"]["valence"]["ccc"]) / 2.0) print("Test CC:", test_measures_dict["arousal"]["arousal"]["cc"], test_measures_dict["valence"]["valence"]["cc"], (test_measures_dict["arousal"]["arousal"]["cc"] + test_measures_dict["valence"]["valence"]["cc"]) / 2.0) print("Test MAE:", test_measures_dict["arousal"]["arousal"]["mae"], test_measures_dict["valence"]["valence"]["mae"], (test_measures_dict["arousal"]["arousal"]["mae"] + test_measures_dict["valence"]["valence"]["mae"]) / 2.0) elif task_name == "task2": print("Best devel CCC:", best_performance_dict["arousal"]["score"], best_performance_dict["valence"]["score"], (best_performance_dict["arousal"]["score"] + best_performance_dict["valence"]["score"]) / 2.0, best_performance_dict["topic"]["score"]) if are_test_labels_available: print( "Test UAR:", test_measures_dict["arousal"]["arousal"] ["macro-recall"], test_measures_dict["valence"] ["valence"]["macro-recall"], test_measures_dict["topic"]["topic"]["macro-recall"]) print("Test F1:", test_measures_dict["arousal"]["arousal"]["micro-f1"], test_measures_dict["valence"]["valence"]["micro-f1"], test_measures_dict["topic"]["topic"]["micro-f1"]) print( "Test TOT:", 0.66 * test_measures_dict["arousal"]["arousal"]["micro-f1"] + 0.34 * test_measures_dict["arousal"]["arousal"] ["macro-recall"], 0.66 * test_measures_dict["valence"]["valence"]["micro-f1"] + 0.34 * test_measures_dict["valence"]["valence"] ["macro-recall"], 0.66 * test_measures_dict["topic"]["topic"]["micro-f1"] + 0.34 * test_measures_dict["topic"]["topic"]["macro-recall"]) elif task_name == "task3": print("Best devel CCC:", best_performance_dict["trustworthiness"]) if are_test_labels_available: print( "Test CCC:", test_measures_dict["trustworthiness"] ["trustworthiness"]["ccc"]) print( "Test CC:", test_measures_dict["trustworthiness"] ["trustworthiness"]["cc"]) print( "Test MAE:", test_measures_dict["trustworthiness"] ["trustworthiness"]["mae"]) else: raise NotImplementedError if task_name == "task1": results = dict() results["method_string"] = method_string results["arousal"] = dict() results["valence"] = dict() results["arousal"]["best_devel_ccc"] = best_performance_dict[ "arousal"] results["valence"]["best_devel_ccc"] = best_performance_dict[ "valence"] if are_test_labels_available: results["arousal"]["test_ccc"] = test_measures_dict[ "arousal"]["arousal"]["ccc"] results["valence"]["test_ccc"] = test_measures_dict[ "valence"]["valence"]["ccc"] results["arousal"]["test_cc"] = test_measures_dict[ "arousal"]["arousal"]["cc"] results["valence"]["test_cc"] = test_measures_dict[ "valence"]["valence"]["cc"] results["arousal"]["test_mae"] = test_measures_dict[ "arousal"]["arousal"]["mae"] results["valence"]["test_mae"] = test_measures_dict[ "valence"]["valence"]["mae"] results["arousal"]["test_true"] = test_items_dict[ "arousal"].arousal.true results["valence"]["test_true"] = test_items_dict[ "valence"].valence.true results["arousal"]["test_pred"] = test_items_dict[ "arousal"].arousal.pred results["valence"]["test_pred"] = test_items_dict[ "valence"].valence.pred print("Saving test predictions at:", method_string) np.save(method_string + "/arousal_test_pred.npy", test_items_dict["arousal"].arousal.pred) np.save(method_string + "/valence_test_pred.npy", test_items_dict["valence"].valence.pred) elif task_name == "task2": results = dict() results["method_string"] = method_string results["arousal"] = dict() results["valence"] = dict() results["emotion"] = dict() results["topic"] = dict() results["arousal"][ "best_devel_macro_recall"] = best_performance_dict[ "arousal"]["macro-recall"] results["arousal"][ "best_devel_micro_f1"] = best_performance_dict["arousal"][ "micro-f1"] results["arousal"]["best_devel_score"] = best_performance_dict[ "arousal"]["score"] results["valence"][ "best_devel_macro_recall"] = best_performance_dict[ "valence"]["macro-recall"] results["valence"][ "best_devel_micro_f1"] = best_performance_dict["valence"][ "micro-f1"] results["valence"]["best_devel_score"] = best_performance_dict[ "valence"]["score"] results["emotion"]["best_devel_macro_recall"] = ( best_performance_dict["arousal"]["macro-recall"] + best_performance_dict["valence"]["macro-recall"]) / 2.0 results["emotion"]["best_devel_micro_f1"] = ( best_performance_dict["arousal"]["micro-f1"] + best_performance_dict["valence"]["micro-f1"]) / 2.0 results["emotion"]["best_devel_score"] = ( best_performance_dict["arousal"]["score"] + best_performance_dict["valence"]["score"]) / 2.0 results["topic"][ "best_devel_macro_recall"] = best_performance_dict[ "topic"]["macro-recall"] results["topic"][ "best_devel_micro_f1"] = best_performance_dict["topic"][ "micro-f1"] results["topic"]["best_devel_score"] = best_performance_dict[ "topic"]["score"] if are_test_labels_available: results["arousal"][ "test_macro_recall"] = test_measures_dict["arousal"][ "arousal"]["macro-recall"] results["valence"][ "test_macro_recall"] = test_measures_dict["valence"][ "valence"]["macro-recall"] results["emotion"]["test_macro_recall"] = ( test_measures_dict["arousal"]["arousal"] ["macro-recall"] + test_measures_dict["valence"] ["valence"]["macro-recall"]) / 2.0 results["topic"]["test_macro_recall"] = test_measures_dict[ "valence"]["valence"]["macro-recall"] results["arousal"]["test_micro_f1"] = test_measures_dict[ "arousal"]["arousal"]["micro-f1"] results["valence"]["test_micro_f1"] = test_measures_dict[ "valence"]["valence"]["micro-f1"] results["emotion"]["test_micro_f1"] = ( test_measures_dict["arousal"]["arousal"]["micro-f1"] + test_measures_dict["valence"]["valence"]["micro-f1"] ) / 2.0 results["topic"]["test_micro_f1"] = test_measures_dict[ "valence"]["valence"]["micro-f1"] results["arousal"]["test_score"] = 0.66 * results["arousal"]["test_micro_f1"] + 0.34 * \ results["arousal"]["test_macro_recall"] results["valence"]["test_score"] = 0.66 * results["valence"]["test_micro_f1"] + 0.34 * \ results["valence"]["test_macro_recall"] results["emotion"]["test_score"] = ( results["arousal"]["test_score"] + results["valence"]["test_score"]) / 2.0 results["topic"]["test_score"] = 0.66 * results["topic"][ "test_micro_f1"] + 0.34 * results["topic"][ "test_macro_recall"] results["arousal"]["test_true"] = test_items_dict[ "arousal"].arousal.true results["valence"]["test_true"] = test_items_dict[ "valence"].valence.true results["topic"]["test_true"] = test_items_dict[ "topic"].topic.true results["arousal"]["test_pred"] = test_items_dict[ "arousal"].arousal.pred results["valence"]["test_pred"] = test_items_dict[ "valence"].valence.pred results["topic"]["test_pred"] = test_items_dict[ "topic"].topic.pred print("Saving test predictions at:", method_string) np.save(method_string + "/arousal_test_pred.npy", test_items_dict["arousal"].arousal.pred) np.save(method_string + "/valence_test_pred.npy", test_items_dict["valence"].valence.pred) np.save(method_string + "/topic_test_pred.npy", test_items_dict["topic"].topic.pred) elif task_name == "task3": results = dict() results["method_string"] = method_string results["trustworthiness"] = dict() results["trustworthiness"][ "best_devel_ccc"] = best_performance_dict[ "trustworthiness"] if are_test_labels_available: results["trustworthiness"][ "test_ccc"] = test_measures_dict["trustworthiness"][ "trustworthiness"]["ccc"] results["trustworthiness"]["test_cc"] = test_measures_dict[ "trustworthiness"]["trustworthiness"]["cc"] results["trustworthiness"][ "test_mae"] = test_measures_dict["trustworthiness"][ "trustworthiness"]["mae"] results["trustworthiness"]["test_true"] = test_items_dict[ "trustworthiness"].arousal.true results["trustworthiness"]["test_pred"] = test_items_dict[ "trustworthiness"].arousal.pred print("Saving test predictions at:", method_string) np.save( method_string + "/trustworthiness_test_pred.npy", test_items_dict["trustworthiness"].trustworthiness.pred) else: raise NotImplementedError return results
import tensorflow as tf import config import numpy as np import data_provider import losses import models data_folder = config.TFRECORDS_SAVE_PATH frames, audio, ground_truth, ids = data_provider.get_split(data_folder, True, 'train', 2, seq_length=2, debugging=True) ''' path = config.TFRECORDS_SAVE_PATH + "/tf_records/test/16.tfrecords" record_iterator = tf.python_io.tf_record_iterator(path=path) for string_record in record_iterator: example = tf.train.Example() example.ParseFromString(string_record) sample_id = int(example.features.feature['sample_id'] .int64_list .value[0]) subject_id = int(example.features.feature['subject_id'] .int64_list
def evaluate(data_folder): g = tf.Graph() with g.as_default(): # Load dataset. frames, audio, ground_truth, _ = data_provider.get_split( data_folder, False, FLAGS.portion, FLAGS.batch_size, FLAGS.seq_length) # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=False): with slim.arg_scope( slim.nets.resnet_utils.resnet_arg_scope( is_training=False)): prediction = models.get_model(FLAGS.model)( frames, audio, hidden_units=FLAGS.hidden_units) # Computing MSE and Concordance values, and adding them to summary names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'eval/mse_arousal': slim.metrics.streaming_mean_squared_error(prediction[:, :, 0], ground_truth[:, :, 0]), 'eval/mse_valence': slim.metrics.streaming_mean_squared_error(prediction[:, :, 1], ground_truth[:, :, 1]), }) summary_ops = [] conc_total = 0 mse_total = 0 for i, name in enumerate(['arousal', 'valence']): with tf.name_scope(name) as scope: concordance_cc2, values, updates = metrics.concordance_cc2( tf.reshape(prediction[:, :, i], [-1]), tf.reshape(ground_truth[:, :, i], [-1])) for n, v in updates.items(): names_to_updates[n + '/' + name] = v op = tf.summary.scalar('eval/concordance_' + name, concordance_cc2) op = tf.Print(op, [concordance_cc2], 'eval/concordance_' + name) summary_ops.append(op) mse_eval = 'eval/mse_' + name op = tf.summary.scalar(mse_eval, names_to_values[mse_eval]) op = tf.Print(op, [names_to_values[mse_eval]], mse_eval) summary_ops.append(op) mse_total += names_to_values[mse_eval] conc_total += concordance_cc2 conc_total = conc_total / 2 mse_total = mse_total / 2 op = tf.summary.scalar('eval/concordance_total', conc_total) op = tf.Print(op, [conc_total], 'eval/concordance_total') summary_ops.append(op) op = tf.summary.scalar('eval/mse_total', mse_total) op = tf.Print(op, [mse_total], 'eval/mse_total') summary_ops.append(op) num_examples = FLAGS.num_examples num_batches = int(num_examples / (FLAGS.batch_size * FLAGS.seq_length)) logging.set_verbosity(1) # Setup the global step. eval_interval_secs = FLAGS.eval_interval_secs # How often to run the evaluation. slim.evaluation.evaluation_loop( '', FLAGS.checkpoint_dir, FLAGS.log_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), summary_op=tf.summary.merge(summary_ops), eval_interval_secs=eval_interval_secs)
def evaluate(data_folder): """Evaluates the model once. Prints in terminal the Accuracy and the UAR of the audio model. Args: data_folder: The folder that contains the test data. """ g = tf.Graph() with g.as_default(): # Load dataset. audio, labels, num_examples = data_provider.get_split( data_folder, FLAGS.portion, FLAGS.batch_size) # Define model graph. with slim.arg_scope([slim.batch_norm, slim.layers.dropout], is_training=False): predictions = models.get_model(FLAGS.model)(audio) coord = tf.train.Coordinator() variables_to_restore = slim.get_variables_to_restore() num_batches = math.ceil(num_examples / float(FLAGS.batch_size)) evaluated_predictions = [] evaluated_labels = [] saver = tf.train.Saver(variables_to_restore) model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) print('Loading model from {}'.format(model_path)) with tf.Session() as sess: saver.restore(sess, model_path) tf.train.start_queue_runners(sess=sess) try: for _ in print_progress(range(num_batches), prefix="Batch"): pr, l = sess.run([predictions, labels]) evaluated_predictions.append(pr) evaluated_labels.append(l)# if coord.should_stop(): break coord.request_stop() except Exception as e: coord.request_stop(e) predictions = np.reshape(evaluated_predictions, (-1, 2)) labels = np.reshape(evaluated_labels, (-1, 2)) pred_argmax = np.argmax(predictions, axis=1) lab_argmax = np.argmax(labels, axis=1) not_pred_argmax = np.argmin(predictions, axis=1) not_lab_argmax = np.argmin(labels, axis=1) correct = (pred_argmax == lab_argmax).mean() print('Accuracy: {}'.format(correct)) recall_1 = sm.recall_score(lab_argmax, pred_argmax) recall_2 = sm.recall_score(not_lab_argmax, not_pred_argmax) print('Function recall 1: {}'.format(recall_1)) print('Function recall 2: {}'.format(recall_2)) uar = (recall_1 + recall_2) / 2 print('UAR: {}'.format(uar))