def evaluate_baselines(resource_manager, evaluations, experiment): def extract_data(lexicon, annotations): lemma_poses = [] frame_ids = [] for annotation in annotations: lemma_poses.append(annotation.lemma_pos) frame_ids.append(lexicon.get_id(annotation.frame_name)) return lemma_poses, frame_ids corpus_train = experiment.corpus_train corpus_val = experiment.corpus_validation corpus_test = experiment.corpus_test lexicon_name = experiment.lexicon_name train_annotations_file = resource_manager.get_frame_annotations_file( corpus_train) val_annotations_file = resource_manager.get_frame_annotations_file( corpus_val) test_annotations_file = resource_manager.get_frame_annotations_file( corpus_test) frame_to_id_mapping = build_frame_to_id_mapping(train_annotations_file, val_annotations_file, test_annotations_file) lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping) train_annotations = load_annotations(train_annotations_file) test_annotations = load_annotations(test_annotations_file) lemma_poses_train, frame_ids_train = extract_data(lexicon, train_annotations) lemma_poses_test, frame_ids_test = extract_data(lexicon, test_annotations) baselines = [ DataMajorityBaseline(lexicon), DataMajorityLexiconBaseline(lexicon), LexiconBaseline(lexicon) ] for baseline in baselines: start = timer() baseline.fit(lemma_poses_train, frame_ids_train) end = timer() predictions = baseline.predict(lemma_poses_test) evaluations.add_evaluation( type(baseline).__name__, '', corpus_train, corpus_test, lexicon, '', 'baseline', predictions, frame_ids_test, lemma_poses_test, baseline, end - start)
def evaluate_model(resource_manager, evaluation_manager, model_name, model_type, corpus_name, lexicon_name, embeddings): model, frame_to_id_mapping, features, metadata = load_trained_model(model_name, model_type) assert metadata.embedding_name == embeddings.vsm_name, 'Cannot use two different embeddings for train and eval' lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping) dataset, X, Y = load_dataset(resource_manager, model_type, corpus_name, embeddings, lexicon, features) predictions = model.predict(X) predictions_with_lexicon = predict_with_lexicon(model, X, Y, dataset.lemma_pos, lexicon) evaluation = evaluation_manager.add_evaluation(model_name, features, metadata.corpus_train, corpus_name, lexicon, embeddings.vsm_name, True, Y, predictions, dataset.lemma_pos, None, 0) evaluation_with_lexicon = evaluation_manager.add_evaluation(model_name, features, metadata.corpus_train, corpus_name, lexicon, embeddings.vsm_name, False, Y, predictions_with_lexicon, dataset.lemma_pos, None, 0) return evaluation, evaluation_with_lexicon
def evaluate_mlp(resource_manager, evaluations, experiment, mlp_config, features, runs=1, name=None): logging.info('Running experiment: [%s]', experiment) corpus_train = experiment.corpus_train corpus_val = experiment.corpus_validation corpus_test = experiment.corpus_test lexicon_name = experiment.lexicon_name if isinstance(experiment.embeddings, dict): # embeddings are given as a dict dict_embeddings = experiment.embeddings embeddings_vsm_name_concatenatedString = "" if "embeddings" in dict_embeddings.keys(): print( "Info: Token embeddings obtained via dict_embeddings['embeddings']" ) embeddings_vsm_name_concatenatedString += dict_embeddings[ "embeddings"].vsm_name if "synset_embeddings" in dict_embeddings.keys(): print( "Info: Synset embeddings obtained via dict_embeddings['synset_embeddings']" ) embeddings_vsm_name_concatenatedString += dict_embeddings[ "synset_embeddings"].vsm_name if "imagined_embeddings" in dict_embeddings.keys(): print( "Info: Imagined embeddings obtained via dict_embeddings['imagined_embeddings']" ) embeddings_vsm_name_concatenatedString += dict_embeddings[ "imagined_embeddings"].vsm_name embeddings_vsm_name = embeddings_vsm_name_concatenatedString else: # embeddings are given directly as one textual embedding instance embeddings = experiment.embeddings embeddings_vsm_name = embeddings.vsm_name # make a dict out of it: dict_embeddings = {} dict_embeddings["embeddings"] = embeddings train_annotations_file = resource_manager.get_frame_annotations_file( corpus_train) val_annotations_file = resource_manager.get_frame_annotations_file( corpus_val) test_annotations_file = resource_manager.get_frame_annotations_file( corpus_test) frame_to_id_mapping = build_frame_to_id_mapping(train_annotations_file, val_annotations_file, test_annotations_file) lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping) dataset_train, x_train, y_train = load_dataset_mlp(resource_manager, corpus_train, dict_embeddings, lexicon, features) dataset_val, x_val, y_val = load_dataset_mlp(resource_manager, corpus_val, dict_embeddings, lexicon, features) dataset_test, x_test, y_test = load_dataset_mlp(resource_manager, corpus_test, dict_embeddings, lexicon, features) mlp_name = mlp_layers_to_name(mlp_config) logging.info('Evaluating [%s] with features [%s] and experiment [%s]', mlp_name, features, experiment) for run in range(runs): logging.info('Run %d/%d', run + 1, runs) # Build model dict_dims = {} if "embeddings" in dict_embeddings.keys(): dict_dims["embeddings_dim"] = dataset_train.dim if "synset_embeddings" in dict_embeddings.keys(): dict_dims["synset_embeddings_dim"] = dataset_train.synset_dim embeddings_dims = dict_dims if "imagined_embeddings" in dict_embeddings.keys(): dict_dims["imagined_embeddings_dim"] = dataset_train.imagined_dim embeddings_dims = dict_dims if "synset_embeddings" not in dict_embeddings.keys( ) and "imagined_embeddings" not in dict_embeddings.keys(): # unimodal embeddings_dims = dict_dims["embeddings_dim"] model = build_mlp_from_config(mlp_config, embeddings_dims, len(frame_to_id_mapping), features) start = timer() predictions, predictions_with_lexicon, baseline = evaluate_on_train_test_split( model, lexicon, dataset_train, x_train, y_train, x_val, y_val, dataset_test, x_test, y_test) end = timer() evaluations.add_evaluation(mlp_name, features, corpus_train, corpus_test, lexicon, embeddings_vsm_name, True, y_test, predictions, dataset_test.lemma_pos, baseline, end - start) evaluations.add_evaluation(mlp_name, features, corpus_train, corpus_test, lexicon, embeddings_vsm_name, False, y_test, predictions_with_lexicon, dataset_test.lemma_pos, baseline, end - start) if name: save_model(resource_manager, model, name, run, experiment, lexicon, features)
def build_confusion_matrix(model_name, model_type, corpus_test, lexicon_name, embeddings, file_name): def save_confusion_matrix(cnf_matrix, lexicon, file_name, list_labels): rows, columns = cnf_matrix.shape assert rows == columns # n = lexicon.get_number_of_labels() n = len(list_labels) data = [['' for i in range(n+1)] for j in range(n+1)] # i = 0, ..., 895 for idx in range(n): # idx = 0, ..., 894 # label = lexicon.get_frame(idx) label = lexicon.get_frame(list_labels[idx]) data[0][idx+1] = label data[idx+1][0] = label for row in range(rows): # row = 0, ..., 531 / 515 for col in range(columns): e = cnf_matrix[row][col] if e != 0: data[row+1][col+1] = str(e) # Write data with open(resource_manager.get_statistics(file_name), 'w') as f: for idx in range(n+1): entry = '\t'.join(data[idx]) f.write(entry) f.write('\n') model, frame_to_id_mapping, features, metadata = load_trained_model(model_name, model_type) if isinstance(embeddings, dict): # embeddings are given as a dict dict_embeddings = embeddings embeddings_vsm_name_concatenatedString = "" if "embeddings" in dict_embeddings.keys(): embeddings_vsm_name_concatenatedString += dict_embeddings["embeddings"].vsm_name if "synset_embeddings" in dict_embeddings.keys(): embeddings_vsm_name_concatenatedString += dict_embeddings["synset_embeddings"].vsm_name if "imagined_embeddings" in dict_embeddings.keys(): embeddings_vsm_name_concatenatedString += dict_embeddings["imagined_embeddings"].vsm_name embeddings_vsm_name = embeddings_vsm_name_concatenatedString else: # embeddings are given directly as one textual embedding instance # make a dict out of it: dict_embeddings = {} dict_embeddings["embeddings"] = embeddings embeddings = embeddings embeddings_vsm_name = embeddings.vsm_name assert metadata.embedding_name == embeddings_vsm_name, 'Cannot use two different embeddings for train and eval' corpus_train = metadata.corpus_train lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping) dataset_test, x_test, y_test = load_dataset(resource_manager, model_type, corpus_test, embeddings, lexicon, features) y_pred = model.predict(x_test) y_pred_lex = predict_with_lexicon(model, x_test, y_test, dataset_test.lemma_pos, lexicon) labels = [lexicon.get_frame(i) for i in range(lexicon.get_number_of_labels())] y_true = remove_onehot(y_test) y_pred = remove_onehot(y_pred) y_pred_lex = remove_onehot(y_pred_lex) # y_true = y_true[0:3] # y_pred = y_pred[0:3] # y_pred_lex = y_pred_lex[0:3] j = 0 labels_pred = [] labels_pred_lex = [] for y_true_i, y_pred_i, y_pred_lex_i in zip(y_true, y_pred, y_pred_lex): # if lexicon.get_frame(y_true_i) == "Performers_and_roles" or lexicon.get_frame(y_true_i) == "Competition": # if lexicon.get_frame(y_true_i) == "Ride_vehicle": if lexicon.get_frame(y_true_i) == "Statement": j += 1 print(j, lexicon.get_frame(y_true_i), lexicon.get_frame(y_pred_i), lexicon.get_frame(y_pred_lex_i)) labels_pred.append(y_true_i) labels_pred.append(y_pred_i) labels_pred_lex.append(y_true_i) labels_pred_lex.append(y_pred_lex_i) list_labels_pred = list(set(labels_pred)) labels_pred = np.asarray(list_labels_pred) list_labels_pred_lex = list(set(labels_pred_lex)) labels_pred_lex = np.asarray(list_labels_pred_lex) confusion_matrix_nolex = confusion_matrix(y_true, y_pred, labels=labels_pred) confusion_matrix_lex = confusion_matrix(y_true, y_pred_lex, labels=labels_pred_lex) save_confusion_matrix(confusion_matrix_nolex, lexicon, '{0}-confusion-nolex.csv'.format(file_name), list_labels_pred) save_confusion_matrix(confusion_matrix_lex, lexicon, '{0}-confusion-lex.csv'.format(file_name), list_labels_pred_lex)
def analyse_errors(model_name, model_type, corpus_test, lexicon_name, embeddings, corpus_traindev = None): Entry = namedtuple('Entry', ['name', 'correct', 'unseen_at_training', 'seen_only_with_different_label', 'normal_error', 'correct_lex', 'unseen_at_training_lex', 'seen_only_with_different_label_lex', 'normal_error_lex', 'wrong_by_lexicon']) model, frame_to_id_mapping, features, metadata = load_trained_model(model_name, model_type) lexicon = load_lexicon(resource_manager, lexicon_name, frame_to_id_mapping) if isinstance(embeddings, dict): # embeddings are given as a dict dict_embeddings = embeddings embeddings_vsm_name_concatenatedString = "" if "embeddings" in dict_embeddings.keys(): embeddings_vsm_name_concatenatedString += dict_embeddings["embeddings"].vsm_name if "synset_embeddings" in dict_embeddings.keys(): embeddings_vsm_name_concatenatedString += dict_embeddings["synset_embeddings"].vsm_name if "imagined_embeddings" in dict_embeddings.keys(): embeddings_vsm_name_concatenatedString += dict_embeddings["imagined_embeddings"].vsm_name embeddings_vsm_name = embeddings_vsm_name_concatenatedString dataset_traindev, x_traindev, y_traindev = load_dataset(resource_manager, model_type, corpus_traindev, embeddings, lexicon, features) dataset_test, x_test, y_test = load_dataset(resource_manager, model_type, corpus_test, embeddings, lexicon, features) train_annotations = defaultdict(set) test_annotations = defaultdict(set) test_labels = [] y_traindev = remove_onehot(y_traindev) for y_traindev_i, lemma_pos in zip(y_traindev, dataset_traindev.lemma_pos): frame_name = lexicon.get_frame(y_traindev_i) train_annotations[frame_name].add(lemma_pos) y_test = remove_onehot(y_test) for y_test_i, lemma_pos in zip(y_test, dataset_test.lemma_pos): frame_name = lexicon.get_frame(y_test_i) test_annotations[frame_name].add(lemma_pos) test_labels.append(lemma_pos) else: # embeddings are given directly as one textual embedding instance # make a dict out of it: dict_embeddings = {} dict_embeddings["embeddings"] = embeddings embeddings = embeddings embeddings_vsm_name = embeddings.vsm_name dataset_test, x_test, y_test = load_dataset(resource_manager, model_type, corpus_test, embeddings, lexicon, features) corpus_train = metadata.corpus_train train_annotations_file = resource_manager.get_frame_annotations_file(corpus_train) test_annotations_file = resource_manager.get_frame_annotations_file(corpus_test) train_annotations = defaultdict(set) test_annotations = defaultdict(set) test_labels = [] for annotation in load_annotations(train_annotations_file): train_annotations[annotation.frame_name].add(annotation.lemma_pos) for annotation in load_annotations(test_annotations_file): test_annotations[annotation.frame_name].add(annotation.lemma_pos) test_labels.append(annotation.lemma_pos) assert metadata.embedding_name == embeddings_vsm_name, 'Cannot use two different embeddings for train and eval' # embeddings.vsm_name, 'Cannot use two different embeddings for train and eval' y_pred = model.predict(x_test) y_pred_lex = predict_with_lexicon(model, x_test, y_test, dataset_test.lemma_pos, lexicon) y_pred = remove_onehot(y_pred) y_pred_lex = remove_onehot(y_pred_lex) y_true = remove_onehot(y_test) n = len(test_labels) unseen_at_training = 0 seen_only_with_different_label = 0 normal_error = 0 correct = 0 unseen_at_training_lex = 0 seen_only_with_different_label_lex = 0 normal_error_lex = 0 correct_lex = 0 wrong_by_lexicon = 0 for i in range(n): # range(len(y_pred)): # prediction = y_pred[i] prediction_lex = y_pred_lex[i] goldlabel = y_true[i] # lemma_pos = test_labels[i] lemma_pos = dataset_test.lemma_pos[i] predicted_frame = lexicon.get_frame(prediction) gold_frame = lexicon.get_frame(goldlabel) # Without lexicon if prediction == goldlabel: correct += 1 # No data in train but in test elif gold_frame not in train_annotations: assert gold_frame in test_annotations unseen_at_training += 1 # Different label in train than in test elif gold_frame in train_annotations and lemma_pos not in train_annotations[gold_frame]: assert gold_frame in test_annotations seen_only_with_different_label += 1 else: normal_error += 1 # With lexicon if prediction_lex == goldlabel: correct_lex += 1 # Lexicon contains the lemma pos but not the right label elif goldlabel not in lexicon.get_available_frame_ids(lemma_pos): wrong_by_lexicon += 1 # No data in train but in test elif gold_frame not in train_annotations: assert gold_frame in test_annotations unseen_at_training_lex += 1 # Different label in train than in test elif gold_frame in train_annotations and lemma_pos not in train_annotations[gold_frame]: assert gold_frame in test_annotations seen_only_with_different_label_lex += 1 else: normal_error_lex += 1 logging.info('Correct: %.2f, Unseen: %.2f, Seen with different label: %.2f, Normal error: %.2f', correct * 100 / n, unseen_at_training * 100 / n, seen_only_with_different_label * 100 / n, normal_error * 100 / n) logging.info('Correct: %.2f, Wrong by lexicon: %.2f, Unseen: %.2f, Seen with different label: %.2f, Normal error: %.2f', correct_lex * 100 / n, wrong_by_lexicon * 100 / n, unseen_at_training_lex * 100 / n, seen_only_with_different_label_lex * 100 / n, normal_error_lex* 100 / n) return Entry(model_name, correct * 100 / n, unseen_at_training * 100 / n, seen_only_with_different_label * 100 / n, normal_error * 100 / n, correct_lex * 100 / n, unseen_at_training_lex * 100 / n, seen_only_with_different_label_lex * 100 / n, normal_error_lex * 100 / n, wrong_by_lexicon * 100 / n)