def label_rank_loss(arr, arr1, arr2, arr3): a3 = label_ranking_loss(arr, arr1) b3 = label_ranking_loss(arr, arr2) c3 = label_ranking_loss(arr, arr3) print("Ranking Loss Scores for the three classifiers are") print("Using Binary Relevance: " + str(a3)) print("Using Classifier Chain: " + str(b3)) print("Using LabelPowerSet: " + str(c3)) print("\n")
def use_sklearn_ml_knn(): """ :return: """ base_path = os.getcwd() # train_x = np.load(os.path.join(base_path, 'dataset/train_x.npy'), allow_pickle=True) # train_y = np.load(os.path.join(base_path, 'dataset/train_y.npy'), allow_pickle=True) train_x = np.load(os.path.join(base_path, 'my_dataset/train_x.npy'), allow_pickle=True) train_y = np.load(os.path.join(base_path, 'my_dataset/train_y.npy'), allow_pickle=True) new_train_y = [] for tup in train_y: tmp = [] for label in tup: if label == 0: tmp.append(0) else: tmp.append(1) new_train_y.append(tmp) # test_x = np.load('dataset/test_x.npy', allow_pickle=True) # test_y = np.load('dataset/test_y.npy', allow_pickle=True) test_x = np.load('my_dataset/test_x.npy', allow_pickle=True) test_y = np.load('my_dataset/test_y.npy', allow_pickle=True) new_test_y = [] for tup in test_y: tmp = [] for label in tup: if label == 0: tmp.append(0) else: tmp.append(1) new_test_y.append(tmp) new_test_y = np.array(new_test_y) classifier = MLkNN2(train_x, np.array(new_train_y), k=10) # classifier.fit(train_x, np.array(new_train_y)) classifier.fit() predictions = classifier.predict(test_x) predictions = convert_prediction(predictions) # hamming_loss = HammingLoss(new_test_y, predictions) h_loss = hamming_loss(new_test_y, predictions) z = zero_one_loss(new_test_y, predictions) c = coverage_error(new_test_y, predictions) r = label_ranking_loss(new_test_y, predictions) a = average_precision_score(new_test_y, predictions) print('hamming_loss = ', h_loss) print('0-1_loss = ', z) print('cover_loss = ', c) print('rank_loss = ', r) print('average_loss = ', a)
def evaluation(y_pred, y_prob, y_true): coverage = coverage_error(y_true, y_prob) hamming = hamming_loss(y_true, y_pred) ranking_loss = label_ranking_loss(y_true, y_prob) f1_macro = metrics.f1_score(y_true, y_pred, average='macro') f1_micro = metrics.f1_score(y_true, y_pred, average='micro') acc = 0 for i in range(y_true.shape[0]): acc += jaccard_similarity_score( y_true.iloc[i, :], y_pred.iloc[i, :]) # jaccard_similarity_score acc = acc / y_true.shape[0] zero_one = zero_one_loss(y_true, y_pred) # 0-1 error performance = { "coverage_error": coverage, "ranking_loss": ranking_loss, "hamming_loss": hamming, "f1_macro": f1_macro, "f1_micro": f1_micro, "Jaccard_Index": acc, "zero_one_error": zero_one } return performance
def cross_validation(self, features): ''' standalone validation of an untrained classifier splits the features into a training test set and a set for validation Warning: overwrites existing trained model ''' values, classes, categories = self._features_to_values(features) values = np.nan_to_num(values) n_classes = len(categories) (training_values, test_values, training_classes, test_classes) = train_test_split(values, classes, test_size=self.validation_split, random_state=self.seed) self._train(np.array(training_values), training_classes, n_classes) predictions = self._predict(np.array(test_values)) predicted_classes = np_utils.probas_to_classes(predictions) binary_labels = np_utils.to_categorical(test_classes) # compute the metrics accuracy = accuracy_score(test_classes, predicted_classes) precision_score = average_precision_score(binary_labels, predictions) error = coverage_error(binary_labels, predictions) loss = label_ranking_loss(binary_labels, predictions) label_precision = label_ranking_average_precision_score( binary_labels, predictions) real_cat = categories[test_classes] predicted_cat = categories[predicted_classes] return (real_cat, predicted_cat, accuracy, precision_score, error, loss, label_precision)
def metric(pred_prob, label, inclusion_index_set, threshold=0.5): # label, pred_prob structure: [n_classes, n_samples] included_pred_prob = list() included_label = list() for index in inclusion_index_set: included_pred_prob.append(pred_prob[index]) included_label.append(label[index]) prob = np.array(included_pred_prob).transpose() pred = np.array(included_pred_prob).transpose() > threshold true = np.array(included_label).transpose() micro_auc = roc_auc_score(true, prob, average='micro') macro_auc = roc_auc_score(true, prob, average='macro') micro_f1 = f1_score(true, pred, average='micro') macro_f1 = f1_score(true, pred, average='macro') micro_avg_precision = average_precision_score(true, prob, average='micro') macro_avg_precision = average_precision_score(true, prob, average='macro') coverage = coverage_error(true, prob) ranking_loss = label_ranking_loss(true, prob) hamming = hamming_loss(true, pred) fuse = np.concatenate([prob[:, :, np.newaxis], true[:, :, np.newaxis]], axis=2).transpose([1, 0, 2]) top_1_num = top_k_num(fuse, 1) top_3_num = top_k_num(fuse, 3) top_5_num = top_k_num(fuse, 5) top_10_num = top_k_num(fuse, 10) top_20_num = top_k_num(fuse, 20) top_30_num = top_k_num(fuse, 30) top_40_num = top_k_num(fuse, 40) top_50_num = top_k_num(fuse, 50) return macro_auc, micro_auc, micro_f1, macro_f1, micro_avg_precision, macro_avg_precision, coverage, ranking_loss, \ hamming, top_1_num, top_3_num, top_5_num, top_10_num, top_20_num, top_30_num, top_40_num, top_50_num
def powerset(X_train, X_test, y_train, y_test, classifier): print("Label Powerset") model = chooseClassifier(classifier, X_train, y_train) y_pred = model.predict(X_test) hamming = hamming_loss(y_test, y_pred) subset_accuracy = accuracy_score(y_test, y_pred) recall = recall_score(y_test, y_pred, average='micro') precision = precision_score(y_test, y_pred, average='micro') f1 = f1_score(y_test, y_pred, average='micro') coverage = coverage_error(y_test, y_pred.toarray()) aps = label_ranking_average_precision_score(y_test, y_pred.toarray()) rankingloss = label_ranking_loss(y_test, y_pred.toarray()) print("Hamming: " + str(hamming)) print("Subset Accuracy: " + str(subset_accuracy)) print("Recall: " + str(recall)) print("Precision: " + str(precision)) print("F1: " + str(f1)) print("Coverage error: " + str(coverage)) print("Average Precision Score: " + str(aps)) print("Ranking Loss: " + str(rankingloss)) print("\n") return hamming, subset_accuracy, recall, precision, f1, coverage, aps, rankingloss
def no_motion_baseline_metrics(original_dataset_cartesian): traces_train, traces_test = get_traces_for_train_and_test() accuracy_results = [] f1_score_results = [] ranking_results = [] for trace_num, trace in enumerate(traces_test): user = trace['user'] video = trace['video'] repl_tiles_map = read_replica_tile_info(video, user) for t in range(M_WINDOW, len(original_dataset_cartesian[user][video]) - H_WINDOW): print('computing no_motion metrics for trace', trace_num, '/', len(traces_test), 'time-stamp:', t) past_positions = original_dataset_cartesian[user][video][ t - M_WINDOW:t + 1] # pred_tile_map = from_position_to_tile_probability_cartesian(past_positions[-1]) pred_tile_map = repl_tiles_map[t] future_tile_maps = repl_tiles_map[t + 1:t + H_WINDOW + 1] for x_i, tile_map in enumerate(future_tile_maps): accuracy_results.append( accuracy_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map))) f1_score_results.append( f1_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map))) ranking_results.append( label_ranking_loss(tile_map, pred_tile_map)) return np.mean(accuracy_results) * 100, np.mean(f1_score_results), np.mean( ranking_results)
def update_from_numpy(self, preds, labels): for pred, label, cls in zip(zip(*preds), zip(*labels), self.confusion): true_pos = np.sum([p and l for p, l in zip(pred, label)]) true_neg = np.sum([not p and not l for p, l in zip(pred, label)]) false_pos = np.sum([p and not l for p, l in zip(pred, label)]) false_neg = np.sum([not p and l for p, l in zip(pred, label)]) self.num_true_positives += true_pos self.num_true_positives += true_neg self.num_false_positives += false_pos self.num_false_negatives += false_neg cls["true_pos"] += true_pos cls["true_neg"] += true_neg cls["false_pos"] += false_pos cls["false_neg"] += false_neg cls["support"] += true_pos + false_neg n = len(preds) self.n += n self.ranking_loss += label_ranking_loss(labels, preds) * n self.coverage += coverage_error(labels, preds) * n self.average_precision += label_ranking_average_precision_score( labels, preds) * n for pred, label in zip(preds, labels): lowest_rank_prediction = np.argsort(pred)[-1] label = np.argwhere(label) if lowest_rank_prediction not in label: self.one_error += 1
def on_epoch_end(self, epoch, logs={}): result = self.model.predict(self.x_test) roc_auc = metrics.roc_auc_score(self.y_test.ravel(), result.ravel()) print('\r Micro val_roc_auc: %s' % (str(round(roc_auc, 4))), end=100 * ' ' + '\n') fpr = dict() tpr = dict() roc_auc = dict() for i in range(14): fpr[i], tpr[i], _ = roc_curve(self.y_test[:, i], result[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) print("Class " + str(i) + "auc = " + str(roc_auc[i])) macro = sum(roc_auc.values()) / 14 print('\r Macro val_roc_auc: %s' % (str(round(macro, 4))), end=100 * ' ' + '\n') value = coverage_error(self.y_test, result) print('\r coverage_error: %s' % (str(round(value, 4))), end=100 * ' ' + '\n') value = label_ranking_loss(self.y_test, result) print('\r label_ranking_loss: %s' % (str(round(value, 4))), end=100 * ' ' + '\n') roc_auc = label_ranking_average_precision_score(self.y_test, result) print('\r label_ranking_average_precision_score: %s' % (str(round(roc_auc, 4))), end=100 * ' ' + '\n') return
def binary(X_train, X_test, y_train, y_test): print("Binary Relevance") model = BinaryRelevance(classifier=SVC(), require_dense=[True, True]).fit(X_train, y_train) y_pred = model.predict(X_test) hamming = hamming_loss(y_test, y_pred) subset_accuracy = accuracy_score(y_test, y_pred) recall = recall_score(y_test, y_pred, average='micro') precision = precision_score(y_test, y_pred, average='micro') f1 = f1_score(y_test, y_pred, average='micro') coverage = coverage_error(y_test, y_pred.toarray()) aps = label_ranking_average_precision_score(y_test, y_pred.toarray()) rankingloss = label_ranking_loss(y_test, y_pred.toarray()) print("Hamming: " + str(hamming)) print("Subset Accuracy: " + str(subset_accuracy)) print("Recall: " + str(recall)) print("Precision: " + str(precision)) print("F1: " + str(f1)) print("Coverage error: " + str(coverage)) print("Average Precision Score: " + str(aps)) print("Ranking Loss: " + str(rankingloss)) print("\n") return hamming, subset_accuracy, recall, precision, f1, coverage, aps, rankingloss
def on_epoch_end(self, epoch, logs={}): result = self.model.predict_generator(self.val_gen, steps=self.val_gen.n / BATCH, verbose=1) print(self.y[0]) print(result[0]) roc_auc = metrics.roc_auc_score(self.y.ravel(), result.ravel()) print('\r Micro val_roc_auc: %s' % (str(round(roc_auc,4))), end=100*' '+'\n') fpr = dict() tpr = dict() roc_auc = dict() for i in range(14): fpr[i], tpr[i], _ = roc_curve(self.y[:, i], result[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) print("Class " + str(i) + "auc = " + str(roc_auc[i])) value = coverage_error(self.y, result) print('\r coverage_error: %s' % (str(round(value,4))), end=100*' '+'\n') value = label_ranking_loss(self.y, result) print('\r label_ranking_loss: %s' % (str(round(value, 4))), end=100 * ' ' + '\n') roc_auc = label_ranking_average_precision_score(self.y, result) print('\r label_ranking_average_precision_score: %s' % (str(round(roc_auc,4))), end=100*' '+'\n') return
def evaluate(predictions, labels, threshold=0.5): ''' True Positive : Label : 1, Prediction : 1 False Positive : Label : 0, Prediction : 1 False Negative : Label : 0, Prediction : 0 True Negative : Label : 1, Prediction : 0 Precision : TP/(TP + FP) Recall : TP/(TP + FN) F Score : 2.P.R/(P + R) Ranking Loss : The average number of label pairs that are incorrectly ordered given predictions Hammming Loss : The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels) ''' assert predictions.shape == labels.shape, "Shapes: %s, %s" % ( predictions.shape, labels.shape, ) metrics = dict() # print('pre', predictions) # print('label', labels) metrics['coverage'] = coverage_error(labels, predictions) metrics['average_precision'] = label_ranking_average_precision_score( labels, predictions) metrics['ranking_loss'] = label_ranking_loss(labels, predictions) for i in range(predictions.shape[0]): predictions[i, :][predictions[i, :] >= threshold] = 1 predictions[i, :][predictions[i, :] < threshold] = 0 metrics['bae'] = 0 metrics['patk'] = patk(predictions, labels) metrics['hamming_loss'] = hamming_loss(y_pred=predictions, y_true=labels) metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \ metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions) return metrics
def evaluateFold(self, clf, mask): probs = clf.predict_proba( self.x[~mask])[:, 1] #get probability of test ligands being positive ranking_loss = label_ranking_loss( self.y[~mask][:, self.targetIndex].reshape(1, -1), probs.reshape(1, -1)) return ranking_loss
def test_ranking_loss_ties_handling(): # Tie handling assert_almost_equal(label_ranking_loss([[1, 0]], [[0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[0, 1]], [[0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.5]]), 1 / 2) assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.5]]), 1 / 2) assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.5]]), 1)
def evaluate_ouput(y_test, output): metrics = dict() metrics['coverage'] = coverage_error(y_test, output) metrics['average_precision'] = label_ranking_average_precision_score( y_test, output) metrics['ranking_loss'] = label_ranking_loss(y_test, output) metrics['one_error'] = OneError(output, y_test) return metrics
def _costFunction(self,y_ni,t_ni): res=0.0 if self._loss == 'hamming': res = hamming_loss(y_ni, t_ni) elif self._loss == 'rank': res=label_ranking_loss(y_ni, t_ni) elif self._loss == 'f1': res=1-f1_score(y_ni, t_ni, average='binary') return res
def compute_evaluation(true_matrix, predict_matrix): h = hamming_loss(true_matrix, predict_matrix) z = zero_one_loss(true_matrix, predict_matrix) c = coverage_error(true_matrix, predict_matrix) - 1 r = label_ranking_loss(true_matrix, predict_matrix) a = average_precision_score(true_matrix, predict_matrix) result = [h, z, c, r, a] return result
def evaluate(self, evaluation_metric='auc'): """ Prints evaluation score :param evaluation_metric: string name of the evaluation metric specified in EVALUATION_METRIC_VALUES """ if evaluation_metric not in EVALUATION_METRIC_VALUES: print('Error: wrong evaluation metric') return predictions = self.predictions mask = self.mask true_values = self.true_values ratings_true = [] ratings_predicted = [] for i in range(predictions.shape[0]): for j in range(predictions.shape[1]): if mask[i][j]: ratings_true.append(true_values[i][j]) ratings_predicted.append(predictions[i][j]) ratings_true = np.asarray(ratings_true) ratings_predicted = np.asarray(ratings_predicted) # Rmse if evaluation_metric == 'rmse': score = rmse(ratings_true, ratings_predicted) print('\nrmse: ' + str(score)) # Auc if evaluation_metric == 'auc': score = roc_auc_score(ratings_true, ratings_predicted) print('\nauc: ' + str(score)) # Label ranking loss if evaluation_metric == 'lrl': max_rating = max(ratings_predicted) min_rating = min(ratings_predicted) normalized_ratings = [] for r in ratings_predicted: new_rating = (r - min_rating) / (max_rating - min_rating) normalized_ratings.append(new_rating) ratings_predicted = np.zeros((len(ratings_predicted), 3)) for index, r in enumerate(normalized_ratings): ratings_predicted[index, 0] = 1 - r ratings_predicted[index, 1] = 1 - ratings_predicted[index, 0] ratings_predicted[index, 2] = 0 #ratings_predicted = label_binarize(ratings_predicted, classes=[1, 2]) ratings_true = label_binarize(ratings_true, classes=[1, 2, 3]) score = label_ranking_loss(ratings_true, ratings_predicted) print('\nlabel ranking loss: ' + str(score))
def ranking_loss(self): """ Computes the ranking loss, which averages the number of incorrectly-ordered labels (i.e. true labels have a lower score than false labels, weighted by the the inverse number of false and true labels) based on raw precision scores. """ self.ranking_loss = metrics.label_ranking_loss( self.ground_truth, self.predictions_raw) return self.ranking_loss
def evalulate(y_true, y_prob): # the following deal with {0,1} and {-1,1} ambiguities # but may slow down the process (?? not sure) # if -1 in y_true: # y_true = (y_true + 1) / 2.0 y_true = (y_true + 1) / 2.0 auc = roc_auc_score(y_true, y_prob) ap = label_ranking_average_precision_score([y_true], [y_prob]) rl = label_ranking_loss([y_true], [y_prob]) return auc, ap, rl
def ranking_loss(self): """ Computes the ranking loss, which averages the number of incorrectly-ordered labels (i.e. true labels have a lower score than false labels, weighted by the the inverse number of false and true labels) based on raw precision scores. """ self.ranking_loss = metrics.label_ranking_loss(self.ground_truth, self.predictions_raw) return self.ranking_loss
def get_score(y_true, y_pred, labels=None): scores = {} scores["lrap"] = label_ranking_average_precision_score(y_true, y_pred) scores["lrloss"] = label_ranking_loss(y_true, y_pred) scores["ndcg_score"] = ndcg_score(y_true, y_pred) scores["coverage_error"] = coverage_error(y_true, y_pred) try: scores["hamming_loss"] = hamming_loss(y_true, y_pred) except: scores["hamming_loss"] = None try: scores["subset_accuracy"] = accuracy_score(y_true, y_pred) except: scores["subset_accuracy"] = None for avg in [None, "micro", "macro", "weighted", "samples"]: if avg: avg_suffix = f"_{avg}" try: ( scores[f"precision{avg_suffix}"], scores[f"recall{avg_suffix}"], scores[f"f1{avg_suffix}"], _, ) = precision_recall_fscore_support(y_true, y_pred, average=avg) except: ( scores[f"precision{avg_suffix}"], scores[f"recall{avg_suffix}"], scores[f"f1{avg_suffix}"], ) = (None, None, None) try: scores[f"roc_auc{avg_suffix}"] = roc_auc_score( y_true, y_pred, average=avg ) except: scores[f"roc_auc{avg_suffix}"] = None else: try: p, r, f, _ = precision_recall_fscore_support(y_true, y_pred) scores[f"precision"], scores[f"recall"], scores[f"f1"] = ( dict(zip(labels, list(sc))) for sc in (p, r, f) ) except: scores[f"precision"], scores[f"recall"], scores[f"f1"] = ( None, None, None, ) try: scores["roc_auc"] = roc_auc_score(y_true, y_pred) except: scores["roc_auc"] = None return scores
def print_predict(ground_truth, prediction, hyper_params): rounded = 4 AUC_macro = round(roc_auc_score(ground_truth, prediction, average='macro'), rounded) AUC_micro = round(roc_auc_score(ground_truth, prediction, average='micro'), rounded) Coverage_error = round( (coverage_error(ground_truth, prediction)) / ground_truth.shape[1], rounded) rankloss = round(label_ranking_loss(ground_truth, prediction), rounded) One_error = round(one_error(ground_truth, prediction), rounded) Precision_at_ks = precision_at_ks(ground_truth, prediction) Log_loss = round(log_loss(ground_truth, prediction), rounded) Average_precision_score = round( average_precision_score(ground_truth, prediction), rounded) prediction = np.round(prediction) F1_Micro = round(f1_score(ground_truth, prediction, average='micro'), rounded) Hamming_loss = round(hamming_loss(ground_truth, prediction), rounded) Accuracy = round(accuracy_score(ground_truth, prediction), rounded) Recall_score_macro = round( recall_score(ground_truth, prediction, average='macro'), rounded) Recall_score_micro = round( recall_score(ground_truth, prediction, average='micro'), rounded) Precision_score_macro = round( precision_score(ground_truth, prediction, average='macro'), rounded) Precision_score_micro = round( precision_score(ground_truth, prediction, average='micro'), rounded) Jaccard_score_macro = round( jaccard_score(ground_truth, prediction, average='macro'), rounded) Jaccard_score_micro = round( jaccard_score(ground_truth, prediction, average='micro'), rounded) print('Recall_score_macro: ', Recall_score_macro) print('Recall_score_micro: ', Recall_score_micro) print('Precision_score_macro: ', Precision_score_macro) print('Precision_score_micro: ', Precision_score_micro) print('Jaccard_score_macro: ', Jaccard_score_macro) print('Jaccard_score_micro: ', Jaccard_score_micro) print("Accuracy = ", Accuracy) print('precision_at_ks: ', Precision_at_ks) print('Hamming_loss: ', Hamming_loss) print('Log_loss: ', Log_loss) print('Average_precision_score: ', Average_precision_score) print('F1_Micro ', F1_Micro) print('One_error: ', One_error) print('Ranking loss: ', rankloss) print('coverage: ', Coverage_error) print('AUC-micro: ', AUC_micro) print('AUC-macro: ', AUC_macro) print('\n')
def evaluate(_y_true, _y_pred, _y_scores): y_true = np.array(_y_true) y_pred = np.array(_y_pred) y_scores = np.array(_y_scores) pre = precision_score(y_true, y_pred, average='micro') rec = recall_score(y_true, y_pred, average='micro') fs = f1_score(y_true, y_pred, average='micro') hl = hamming_loss(y_true, y_pred) rl = label_ranking_loss(y_true, y_scores) return pre, rec, fs, hl, rl
def get_classification_report_2(self,train_y, predicted_score, verbose = 1): cov_err = metrics.coverage_error(train_y,predicted_score) label_rank_avg_prec = metrics.label_ranking_average_precision_score(train_y, predicted_score) rank_loss = metrics.label_ranking_loss(train_y, predicted_score) log_loss = metrics.log_loss(train_y, predicted_score) if(verbose): print('CoverageError', cov_err) print('LabelRankingAvgPrec', label_rank_avg_prec) print('LabelRankingLoss', rank_loss) print('log_loss', log_loss) return [cov_err, label_rank_avg_prec, rank_loss, log_loss]
def treino_binarizacao(X, Y): labels = [ 'Latitude', 'Longitude', 'DiaSemChuva', 'Precipitacao', 'RiscoFogo', 'TempBulboSecoEst1', 'TempBulboUmidoEst1', 'UmidadeRelativaEst1', 'DirecaoVentoEst1', 'VelocidadeVentoNebulosidadeEst1', 'DistanciaParaEst1', 'TempBulboSecoEst2', 'TempBulboUmidoEst2', 'UmidadeRelativaEst2', 'DirecaoVentoEst2', 'VelocidadeVentoNebulosidadeEst2', 'DistanciaParaEst2' ] mlb = MultiLabelBinarizer() Ybin = mlb.fit_transform(Y) mlp = neuralnetwork.MLPClassifier(hidden_layer_sizes=(10, 4), activation='tanh', solver='lbfgs', learning_rate='invscaling', random_state=2818, max_iter=400, early_stopping=True) x_train, x_test, y_train, y_test = model.train_test_split(X, Y, train_size=0.33) mlp.fit(x_train, y_train) y_pred = mlp.predict(x_test) print("Erro de cobertura:" + str(metrics.coverage_error(y_test, y_pred))) print("Precisão média de labels:" + str(metrics.label_ranking_average_precision_score(y_test, y_pred))) print("Perda de ranks:" + str(metrics.label_ranking_loss(y_test, y_pred))) matriz = matriz_confusao(y_test, y_pred) results = { "Erro de cobertura": metrics.coverage_error(y_test, y_pred), "Precisão média de labels": metrics.label_ranking_average_precision_score(y_test, y_pred), "Perda de ranks": metrics.label_ranking_loss(y_test, y_pred), "Matrizes": matriz } res_df = pd.DataFrame(results) res_df.to_csv( "C:\\Users\Livnick\Documents\dadosFocos\ResultadosMAcomMatriz2.csv")
def CVPR18_metrics(original_dataset_cartesian): M_WINDOW_TRAINED_MODEL = 5 H_WINDOW_TRAINED_MODEL = 25 traces_train, traces_test = get_traces_for_train_and_test() model = create_CVPR18_model(M_WINDOW_TRAINED_MODEL, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL) model.load_weights( os.path.join( ROOT_FOLDER, 'CVPR18', 'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_25_end_25', 'weights_100.hdf5')) accuracy_results = [] f1_score_results = [] ranking_results = [] for trace_num, trace in enumerate(traces_test): print('computing CVPR18 metrics for trace', trace_num, '/', len(traces_test)) user = trace['user'] video = trace['video'] repl_tiles_map = read_replica_tile_info(video, user) saliency_in_video = load_saliency(SALIENCY_FOLDER, video) for t in range(M_WINDOW, len(original_dataset_cartesian[user][video]) - H_WINDOW): past_positions = original_dataset_cartesian[user][video][ t - M_WINDOW:t + 1] # ToDo: The value "6" is hardcoder, it comes from "int(MODEL_SAMPLING_RATE / ORIGINAL_SAMPLING_RATE)" curr_id_in_model_steps = int(t / 6) sal_decoder = np.zeros( (1, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL, 1)) picked_sal_decoder = saliency_in_video[curr_id_in_model_steps + 1:curr_id_in_model_steps + H_WINDOW_TRAINED_MODEL + 1] sal_decoder[0, :len(picked_sal_decoder), :, :, 0] = picked_sal_decoder pred_tile_map = get_CVPR18_prediction(model, past_positions, M_WINDOW_TRAINED_MODEL, sal_decoder) # future_positions = original_dataset_cartesian[user][video][t+1:t+H_WINDOW+1] future_tile_maps = repl_tiles_map[t + 1:t + H_WINDOW + 1] for x_i, tile_map in enumerate(future_tile_maps): accuracy_results.append( accuracy_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map[x_i]))) f1_score_results.append( f1_score(np.ndarray.flatten(tile_map), np.ndarray.flatten(pred_tile_map[x_i]))) ranking_results.append( label_ranking_loss(tile_map, pred_tile_map[x_i])) print('CVPR18:\tAccuracy', np.mean(accuracy_results) * 100, 'F-Score', np.mean(f1_score_results), 'Rank. Loss', np.mean(ranking_results))
def Ranking_loss(labels, probs, mode=1): ''' 用来考察样本的不相关标记的排序低于相关标记的排序情况 @labels: true labels of samples @probs: label's probility of samples ''' if mode: rl = label_ranking_loss(labels, probs) else: rl = np.mean(list(map(_ranking_loss, probs, labels))) return rl
def writeall(y_true,y_score,filename): fp = open(str(filename) + ".txt","w") fp.write("Classification Report:\n") fp.write(mlc_classification_report(y_true,y_score)) fp.write("\nHamming loss (lower is better [0,1]): " + str(hamming_loss(y_true,y_score))) fp.write("\nAccuracy score (higher is better [0,1]): " + str(mlc_accuracy_score(y_true,y_score))) fp.write("\nJaccard similarity score (higher is better [0,1]): " + str(mlc_jaccard_similarity_score(y_true,y_score))) fp.write("\nF1 score: " + str(mlc_f1score(y_true,y_score))) fp.write("\nSubset accuracy (higher is better [0,1]): " + str(mlc_subset_accuracy(y_true,y_score))) fp.write("\nAverage precision score (higher is better [0,1]): " + str(average_precision_score(y_true,y_score))) fp.write("\nRanking Loss: (lower is better [0,1]) " + str(label_ranking_loss(y_true,y_score))) fp.write("\nAverage Micro Precision: " +str(precision_score(y_true,y_score,average='micro'))) fp.write("\nAverage Micro Recall: "+str(recall_score(y_true,y_score,average='micro'))) fp.close()
def get_avg_results(hat_y, y): values = {} values['avg_precision_micro'] = average_precision_score(y, hat_y, average='micro') # values['avg_precision_macro'] = average_precision_score(y, hat_y, average = 'macro') values['roc_auc_score_micro'] = roc_auc_score(y, hat_y, average='micro') # values['roc_auc_score_macro'] = roc_auc_score(y, hat_y, average = 'macro') values['coverage_error'] = coverage_error(y, hat_y) values[ 'label_ranking_average_precision_score'] = label_ranking_average_precision_score( y, hat_y) values['label_ranking_loss'] = label_ranking_loss(y, hat_y) return values
def eval_fold(train_idxs, test_idxs): measures = [0, 0, 0, 0, 0, 0] fold_classifier = clone(classifier) X_train, X_test = split_training_test(X, train_idxs, test_idxs) Y_train, Y_test = Y[train_idxs], Y[test_idxs] fold_classifier.fit(X_train, Y_train) Y_pred = fold_classifier.predict(X_test) measures[0] = f1_score(Y_test, Y_pred, average='macro') measures[1] = f1_score(Y_test, Y_pred, average='micro') measures[2] = accuracy_score(Y_test, Y_pred) measures[3] = label_ranking_loss(Y_test, Y_pred) measures[4] = hamming_loss(Y_test, Y_pred) measures[5] = zero_one_loss(Y_test, Y_pred) return np.array(measures)
def multi_label_evaluate(y, y_prob, threshold): statistics = Statistics() y_pred = (y_prob >= threshold).astype(int) y_pred_50 = (y_prob >= 0.5).astype(int) ranking_loss = label_ranking_loss(y, y_pred) lraps = label_ranking_average_precision_score(y, y_pred) ranking_loss_50 = label_ranking_loss(y, y_pred_50) lraps_50 = label_ranking_average_precision_score(y, y_pred_50) f1_macro = f1_score(y, y_pred, average='macro') f1_macro_50 = f1_score(y, y_pred_50, average='macro') statistics.update_statistics("Multi-Label", "Ranking Loss", ranking_loss) statistics.update_statistics("Multi-Label", "Ranking Precision", lraps) statistics.update_statistics("Multi-Label", "Ranking Loss (t=0.5)", ranking_loss_50) statistics.update_statistics("Multi-Label", "Ranking Precision (t=0.5)", lraps_50) statistics.update_statistics("Multi-Label", "Macro F1", f1_macro) statistics.update_statistics("Multi-Label", "Macro F1 (t=0.5)", f1_macro_50) try: auc_macro = roc_auc_score(y, y_pred, average='macro') auc_macro_50 = roc_auc_score(y, y_pred_50, average='macro') auc_pr_macro = roc_auc_score(y, y_prob, average='macro') statistics.update_statistics("Multi-Label", "Macro AUC", auc_macro) statistics.update_statistics("Multi-Label", "Macro AUC (t=0.5)", auc_macro_50) statistics.update_statistics("Multi-Label", "Macro AUC (Pr)", auc_pr_macro) except ValueError: statistics.update_statistics("Multi-Label", "Macro AUC", np.NaN) statistics.update_statistics("Multi-Label", "Macro AUC (t=0.5)", np.NaN) statistics.update_statistics("Multi-Label", "Macro AUC (Pr)", np.NaN) return statistics
def evaluate(predictions, labels, threshold=0.4, multi_label=True): ''' True Positive : Label : 1, Prediction : 1 False Positive : Label : 0, Prediction : 1 False Negative : Label : 0, Prediction : 0 True Negative : Label : 1, Prediction : 0 Precision : TP/(TP + FP) Recall : TP/(TP + FN) F Score : 2.P.R/(P + R) Ranking Loss : The average number of label pairs that are incorrectly ordered given predictions Hammming Loss : The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels) ''' assert predictions.shape == labels.shape, "Shapes: %s, %s" % (predictions.shape, labels.shape,) metrics = dict() if not multi_label: metrics['bae'] = BAE(labels, predictions) labels, predictions = np.argmax(labels, axis=1), np.argmax(predictions, axis=1) metrics['accuracy'] = accuracy_score(labels, predictions) metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], _ = \ precision_recall_fscore_support(labels, predictions, average='micro') metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['coverage'], \ metrics['average_precision'], metrics['ranking_loss'], metrics['pak'], metrics['hamming_loss'] \ = 0, 0, 0, 0, 0, 0, 0, 0 else: metrics['coverage'] = coverage_error(labels, predictions) metrics['average_precision'] = label_ranking_average_precision_score(labels, predictions) metrics['ranking_loss'] = label_ranking_loss(labels, predictions) for i in range(predictions.shape[0]): predictions[i, :][predictions[i, :] >= threshold] = 1 predictions[i, :][predictions[i, :] < threshold] = 0 metrics['bae'] = 0 metrics['patk'] = patk(predictions, labels) metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \ metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions) return metrics
def ranking_loss(self): self.ranking_loss = metrics.label_ranking_loss(self.ground_truth, self.predictions_raw) return 'Ranking Loss: ' + str(self.ranking_loss)
def multilabel_metrics(pred_list, verbose, extra_vars, split): """ Multiclass classification metrics. see multilabel ranking metrics in sklearn library for more info: http://scikit-learn.org/stable/modules/model_evaluation.html#multilabel-ranking-metrics :param pred_list: dictionary of hypothesis sentences :param verbose: if greater than 0 the metric measures are printed out :param extra_vars: extra variables extra_vars['word2idx'] - dictionary mapping from words to indices extra_vars['references'] - list of GT labels :param split: split on which we are evaluating :return: Dictionary of multilabel metrics """ from sklearn import metrics as sklearn_metrics word2idx = extra_vars[split]['word2idx'] # check if an additional dictionary matching raw to basic and general labels is provided # in that case a more general evaluation will be considered raw2basic = extra_vars[split].get('raw2basic', None) if raw2basic is not None: logging.info('Applying general evaluation with raw2basic dictionary.') if raw2basic is None: n_classes = len(word2idx) else: basic_values = set(raw2basic.values()) n_classes = len(basic_values) n_samples = len(pred_list) # Create prediction matrix y_pred = np.zeros((n_samples, n_classes)) for i_s, sample in list(enumerate(pred_list)): for word in sample: if raw2basic is None: y_pred[i_s, word2idx[word]] = 1 else: word = word.strip() y_pred[i_s, raw2basic[word]] = 1 # Prepare GT gt_list = extra_vars[split]['references'] if raw2basic is None: y_gt = np.array(gt_list) else: idx2word = {v: k for k, v in iteritems(word2idx)} y_gt = np.zeros((n_samples, n_classes)) for i_s, sample in list(enumerate(gt_list)): for raw_idx, is_active in list(enumerate(sample)): if is_active: word = idx2word[raw_idx].strip() y_gt[i_s, raw2basic[word]] = 1 # Compute Coverage Error coverr = sklearn_metrics.coverage_error(y_gt, y_pred) # Compute Label Ranking AvgPrec avgprec = sklearn_metrics.label_ranking_average_precision_score(y_gt, y_pred) # Compute Label Ranking Loss rankloss = sklearn_metrics.label_ranking_loss(y_gt, y_pred) # Compute Precision, Recall and F1 score precision, recall, f1, _ = sklearn_metrics.precision_recall_fscore_support(y_gt, y_pred, average='micro') if verbose > 0: logging.info( '"coverage_error" (best: avg labels per sample = %f): %f' % (float(np.sum(y_gt)) / float(n_samples), coverr)) logging.info('Label Ranking "average_precision" (best: 1.0): %f' % avgprec) logging.info('Label "ranking_loss" (best: 0.0): %f' % rankloss) logging.info('precision: %f' % precision) logging.info('recall: %f' % recall) logging.info('f1: %f' % f1) return {'coverage_error': coverr, 'average_precision': avgprec, 'ranking_loss': rankloss, 'precision': precision, 'recall': recall, 'f1': f1}
def test_label_ranking_loss(): assert_almost_equal(label_ranking_loss([[0, 1]], [[0.25, 0.75]]), 0) assert_almost_equal(label_ranking_loss([[0, 1]], [[0.75, 0.25]]), 1) assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.75]]), 0) assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.75]]), 1 / 2) assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.75]]), 0) assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.75]]), 2 / 2) assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.75]]), 1 / 2) assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.75]]), 2 / 2) # Undefined metrics - the ranking doesn't matter assert_almost_equal(label_ranking_loss([[0, 0]], [[0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[1, 1]], [[0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[0, 0]], [[0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[1, 1]], [[0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.5, 0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.5, 0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.25, 0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 0) # Non trival case assert_almost_equal(label_ranking_loss([[0, 1, 0], [1, 1, 0]], [[0.1, 10., -3], [0, 1, 3]]), (0 + 2 / 2) / 2.) assert_almost_equal(label_ranking_loss( [[0, 1, 0], [1, 1, 0], [0, 1, 1]], [[0.1, 10, -3], [0, 1, 3], [0, 2, 0]]), (0 + 2 / 2 + 1 / 2) / 3.) assert_almost_equal(label_ranking_loss( [[0, 1, 0], [1, 1, 0], [0, 1, 1]], [[0.1, 10, -3], [3, 1, 3], [0, 2, 0]]), (0 + 2 / 2 + 1 / 2) / 3.) # Sparse csr matrices assert_almost_equal(label_ranking_loss( csr_matrix(np.array([[0, 1, 0], [1, 1, 0]])), [[0.1, 10, -3], [3, 1, 3]]), (0 + 2 / 2) / 2.)
nlraprecision = [] plrloss = [] plraprecision = [] nscoreone = 0 nscoretwo = 0 correctness = 0 for i in range(0,10): p = classifier2.predictor() p.learnPredictor() n_predicted = p.predict() correct = p.mlb.transform(util2.getCorrectGenres(p.testExamples)) ny_score = np.array(n_predicted) y_true = np.array(correct) nscoreone += label_ranking_loss(y_true, ny_score) nscoretwo += label_ranking_average_precision_score(y_true, ny_score) correctness += util2.printCorrectness(p.mlb, p.testExamples, n_predicted, correct) print "LABEL RANKING LOSS: " + str(float(nscoreone)/10) print "LABEL RANKING AVERAGE PRECISION: " + str(float(nscoretwo)/10) print "CORRECTNESS: " + str(float(correctness)/10) # util2.printAccuracyByGenre(p.mlb, p.testExamples, n_predicted, correct) # util2.printOutput(p.mlb, p.testExamples, n_predicted, correct) # print "==========" # print "PERCENT RESULTS" # print "LABEL RANKING LOSS:"