def twenty_one_cm_rmse(y_true, y_pred, discretise = False): """ used in 21cm paper to evaluate performance """ if discretise: y_p = tools.round_probabilities(y_pred) else: y_p = y_pred return np.sqrt(np.mean((y_true - y_p)**2.)) / np.max(np.abs(y_true))
def regression_r2_score(y_true, y_pred, discretise = False): """ returns well-known R^2 regression coefficient. calculates separately for each output, then takes average over these """ if discretise: y_p = tools.round_probabilities(y_pred) else: y_p = y_pred y_t = y_true return sklearn.metrics.r2_score(y_t, y_p, multioutput='uniform_average')
def explained_variance_score(y_true, y_pred, discretise = False): """ not entirely sure what it is, but see https://scikit-learn.org/stable/modules/model_evaluation.html#explained-variance-score not sure whether it makes sense to discretise """ if discretise: y_p = tools.round_probabilities(y_pred) else: y_p = y_pred y_t = y_true return sklearn.metrics.explained_variance_score(y_t, y_p, multioutput='uniform_average')
def mean_absolute_error(y_true, y_pred, discretise = False): """ requires input arrays to be same np.dtype. returns average, not sum of errors discretising (for classification problems) makes little sense to me, but may be necessary in some obscure scenarios """ if discretise: y_p = tools.round_probabilities(y_pred) else: y_p = y_pred mae_a = tf.Session().run(tf.keras.losses.mean_absolute_error(y_true, y_p)) return mae_a.mean()
def twenty_one_cm_rmse_ts(y_true, y_pred, n_z = 136, discretise = False): """ used in 21cm paper to evaluate performance, returns rmse per timeseries (assumes 136 contiguous elements form ts) assumes m is divisible by n_z """ if discretise: y_p = tools.round_probabilities(y_pred) else: y_p = y_pred errs = [] for i in range(y_pred.shape[0] / n_z - 1): errs.append(np.sqrt(np.mean((y_true[i * n_z:(i + 1) * n_z] - y_p[i * n_z:(i + 1) * n_z])**2.)) / np.max(np.abs(y_true[i * n_z:(i + 1) * n_z]))) errs.append(np.sqrt(np.mean((y_true[-1 * n_z:] - y_p[-1 * n_z:])**2.)) / np.max(np.abs(y_true[-1 * n_z:]))) return np.array(errs)
def decode_onehot_outputs(y_true, y_pred): """ converts one-hot true ys and predicted ys (probabilities) into decoded vectors, for use in sklearn.metric functions. e.g. y_true = [[0,0,1], [0,1,0], [1,0,0]] y_pred = [[0.3,0.4,0.3], [0.1,0.5,0.4], [0.2,0.1,0.7]] - > y_true = [2,1,0] y_pred = [1,1,2] """ y_pred_r = tools.round_probabilities(y_pred) y_pred_d = tools.decode_onehot(y_pred_r) y_true_d = tools.decode_onehot(y_true) return y_pred_d, y_true_d
def categorical_crossentropy(y_true, y_pred, discretise = False): """ requires input arrays to be np.float64 type. returns average, not sum of crossentropy. discretising (for classification problems) makes little sense to me, but may be necessary in some obscure scenarios """ if discretise: y_p = tools.round_probabilities(y_pred) else: y_p = y_pred y_t = y_true y_true_t = tf.Variable(y_t, dtype=tf.float64) y_pred_t = tf.Variable(y_p, dtype=tf.float64) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) cce_a = sess.run(tf.keras.losses.categorical_crossentropy(y_true_t, y_pred_t)) return cce_a.mean()