/
EvaluationHandler.py
129 lines (113 loc) · 5.34 KB
/
EvaluationHandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from Storage import Storage
from ConfigReader import ConfigReader
from SessionConfigReader import SessionConfigReader
from datetime import datetime
import pandas as pd
class EvaluationHandler:
evaluations_id = 'evaluations'
timestamp_col = 'timestamp'
session_id_col = 'session id'
config_id_col = 'config id'
score_col = 'score'
additional_columns = [
'corpus_identifier',
'preprocessor',
'vectorizer',
'word2vec_size',
'word2vec_window',
'word2vec_min_count',
'word-vec_to_doc-vec',
'classifier',
'keras_nn_layers',
'keras_nn_loss',
'keras_nn_optimizer',
'keras_nn_metrics',
'keras_nn_epochs',
'classification_interpreter',
'similarity_function'
]
# expects an evaluation score, optionally a session id
# adds evaluation score to session's evaluations
@staticmethod
def add_evaluation(score, session_id=None):
if session_id is None:
session_id = ConfigReader.get_session_id()
config_id = ConfigReader.get_config_id()
evaluation_frame = Storage.load_pd_frame(EvaluationHandler.evaluations_id, session_id=session_id)
timestamp_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
row = len(evaluation_frame)
evaluation_frame.at[row, EvaluationHandler.timestamp_col] = timestamp_str
evaluation_frame.at[row, EvaluationHandler.session_id_col] = session_id
evaluation_frame.at[row, EvaluationHandler.config_id_col] = config_id
evaluation_frame.at[row, EvaluationHandler.score_col] = score
Storage.store_pd_frame(evaluation_frame, EvaluationHandler.evaluations_id, session_id=session_id)
# optionally expects a session id
# returns pandas data frame containing evaluations for session id or current session
@staticmethod
def load_evaluations(session_id=None):
return Storage.load_pd_frame(EvaluationHandler.evaluations_id, session_id=session_id)
# optionally expects a session id
# clears evaluations for session id or current session
@staticmethod
def clear_evaluations(session_id=None):
Storage.delete_pd_frame(EvaluationHandler.evaluations_id, session_id=session_id)
# optionally expects a list of session ids and/or lists for columns to add and/or to remove
# returns a sorted evaluations data frame, including all specified session and including additional columns, containing some info from the configs
@staticmethod
def compare_evaluations(session_ids=None, remove_cols=None, add_cols=None):
all_evals = pd.DataFrame()
if session_ids is None:
all_evals = EvaluationHandler.load_evaluations()
else:
for session_id in session_ids:
all_evals = all_evals.concat(EvaluationHandler.load_evaluations(session_id=session_id), sort=False, ignore_index=True)
all_evals = all_evals.sort_values(by=[EvaluationHandler.score_col], ascending=False)
i = 0
while i < len(all_evals):
session_id = all_evals.at[i, EvaluationHandler.session_id_col]
conf_id = all_evals.at[i, EvaluationHandler.config_id_col]
conf = SessionConfigReader.get_config(session_id=session_id, config_id=conf_id)
for key in EvaluationHandler.additional_columns:
if key in conf:
value = conf[key][0]
else:
value = ''
all_evals.at[i, key] = ''
all_evals.at[i, key] = value
i = i + 1
if remove_cols is not None:
for key in remove_cols:
if key in all_evals:
all_evals = all_evals.drop(columns=[key])
if add_cols is not None:
i = 0
while i < len(all_evals):
session_id = all_evals.at[i, EvaluationHandler.session_id_col]
conf_id = all_evals.at[i, EvaluationHandler.config_id_col]
conf = SessionConfigReader.get_config(session_id=session_id, config_id=conf_id)
for key in add_cols:
if key in conf:
value = conf[key][0]
else:
value = ''
all_evals.at[i, key] = ''
all_evals.at[i, key] = value
i = i + 1
return all_evals
# sorts the stored evaluations data frame by rank (descending)
@staticmethod
def sort(session_id=None):
evals = EvaluationHandler.load_evaluations(session_id=session_id)
evals = evals.sort_values(by=[EvaluationHandler.score_col], ascending=False)
Storage.store_pd_frame(evals, EvaluationHandler.evaluations_id, session_id=session_id)
# sets the currently best performing config, based on the evaluations
@staticmethod
def set_best_performing(eval_session_id=None):
evals = EvaluationHandler.load_evaluations(session_id=eval_session_id)
evals.sort_values(by=[EvaluationHandler.score_col], ascending=False)
if evals.size > 0:
session_id = evals.at[0, EvaluationHandler.session_id_col]
config_id = evals.at[0, EvaluationHandler.config_id_col]
SessionConfigReader.set_best_performing_by_ids(session_id=session_id, config_id=config_id)
else:
SessionConfigReader.set_best_performing_by_ids()