def make_evaluation_report(predY, testY):
    ############Configuration ################
    average_metric = 'micro'  # {None,'micro', 'macro','weighted','samples'}

    ############Configuration end#############

    #1) Hamming loss
    hl = hamming_loss(testY, predY)
    #2) one_error
    one_error = 0.00000000000000001
    #3) Coverage
    #     try:
    try:
        coverage = coverage_error(testY.toarray(), predY.toarray())
    except:
        try:
            coverage = coverage_error(testY, predY.todense())
        except:
            coverage = 0.0
#     except:
#         coverage = coverage_error(testY,predY.todense())
#4) F1-score and precision
    f1score = metrics.f1_score(testY, predY, average=average_metric)
    precision = metrics.precision_score(testY, predY, average=average_metric)

    res = [hl, one_error, coverage, f1score, precision]
    #     print(" %-12s %-12s %-12s %-12s %-12s %-12s" % ('modelname','hamming','one_error','coverage','f1score','precision'))

    #     print(" %-12s %-12f %-12f %-12f %-12f %-12f" % ('modelname',res[0],res[1],res[2],res[3],res[4]))
    return res
Beispiel #2
0
def coverage_error_scores(arr, arr1, arr2, arr3):

    a4 = coverage_error(arr, arr1)
    b4 = coverage_error(arr, arr2)
    c4 = coverage_error(arr, arr3)

    print("Coverage Error Scores for the three classifiers are")
    print("Using Binary Relevance: " + str(a4))
    print("Using Classifier Chain: " + str(b4))
    print("Using LabelPowerSet: " + str(c4))
    print("\n")
    def on_epoch_end(self, epoch, logs={}):
        result = self.model.predict(self.x_test)
        roc_auc = metrics.roc_auc_score(self.y_test.ravel(), result.ravel())
        print('\r Micro val_roc_auc: %s' % (str(round(roc_auc, 4))),
              end=100 * ' ' + '\n')

        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(14):
            fpr[i], tpr[i], _ = roc_curve(self.y_test[:, i], result[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            print("Class " + str(i) + "auc = " + str(roc_auc[i]))

        macro = sum(roc_auc.values()) / 14
        print('\r Macro val_roc_auc: %s' % (str(round(macro, 4))),
              end=100 * ' ' + '\n')
        value = coverage_error(self.y_test, result)
        print('\r coverage_error: %s' % (str(round(value, 4))),
              end=100 * ' ' + '\n')

        value = label_ranking_loss(self.y_test, result)
        print('\r label_ranking_loss: %s' % (str(round(value, 4))),
              end=100 * ' ' + '\n')

        roc_auc = label_ranking_average_precision_score(self.y_test, result)
        print('\r label_ranking_average_precision_score: %s' %
              (str(round(roc_auc, 4))),
              end=100 * ' ' + '\n')

        return
Beispiel #4
0
def use_sklearn_ml_knn():
    """

    :return:
    """

    base_path = os.getcwd()
    # train_x = np.load(os.path.join(base_path, 'dataset/train_x.npy'), allow_pickle=True)
    # train_y = np.load(os.path.join(base_path, 'dataset/train_y.npy'), allow_pickle=True)

    train_x = np.load(os.path.join(base_path, 'my_dataset/train_x.npy'),
                      allow_pickle=True)
    train_y = np.load(os.path.join(base_path, 'my_dataset/train_y.npy'),
                      allow_pickle=True)

    new_train_y = []
    for tup in train_y:
        tmp = []
        for label in tup:
            if label == 0:
                tmp.append(0)
            else:
                tmp.append(1)
        new_train_y.append(tmp)

    # test_x = np.load('dataset/test_x.npy', allow_pickle=True)
    # test_y = np.load('dataset/test_y.npy', allow_pickle=True)

    test_x = np.load('my_dataset/test_x.npy', allow_pickle=True)
    test_y = np.load('my_dataset/test_y.npy', allow_pickle=True)
    new_test_y = []
    for tup in test_y:
        tmp = []
        for label in tup:
            if label == 0:
                tmp.append(0)
            else:
                tmp.append(1)
        new_test_y.append(tmp)

    new_test_y = np.array(new_test_y)

    classifier = MLkNN2(train_x, np.array(new_train_y), k=10)

    # classifier.fit(train_x, np.array(new_train_y))
    classifier.fit()
    predictions = classifier.predict(test_x)
    predictions = convert_prediction(predictions)

    # hamming_loss = HammingLoss(new_test_y, predictions)
    h_loss = hamming_loss(new_test_y, predictions)
    z = zero_one_loss(new_test_y, predictions)
    c = coverage_error(new_test_y, predictions)
    r = label_ranking_loss(new_test_y, predictions)
    a = average_precision_score(new_test_y, predictions)
    print('hamming_loss = ', h_loss)
    print('0-1_loss = ', z)
    print('cover_loss = ', c)
    print('rank_loss = ', r)
    print('average_loss = ', a)
Beispiel #5
0
def binary(X_train, X_test, y_train, y_test):

    print("Binary Relevance")
    model = BinaryRelevance(classifier=SVC(),
                            require_dense=[True, True]).fit(X_train, y_train)
    y_pred = model.predict(X_test)

    hamming = hamming_loss(y_test, y_pred)
    subset_accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='micro')
    precision = precision_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    coverage = coverage_error(y_test, y_pred.toarray())
    aps = label_ranking_average_precision_score(y_test, y_pred.toarray())
    rankingloss = label_ranking_loss(y_test, y_pred.toarray())
    print("Hamming: " + str(hamming))
    print("Subset Accuracy: " + str(subset_accuracy))
    print("Recall: " + str(recall))
    print("Precision: " + str(precision))
    print("F1: " + str(f1))
    print("Coverage error: " + str(coverage))
    print("Average Precision Score: " + str(aps))
    print("Ranking Loss: " + str(rankingloss))
    print("\n")

    return hamming, subset_accuracy, recall, precision, f1, coverage, aps, rankingloss
def powerset(X_train, X_test, y_train, y_test, classifier):

    print("Label Powerset")
    model = chooseClassifier(classifier, X_train, y_train)
    y_pred = model.predict(X_test)

    hamming = hamming_loss(y_test, y_pred)
    subset_accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='micro')
    precision = precision_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    coverage = coverage_error(y_test, y_pred.toarray())
    aps = label_ranking_average_precision_score(y_test, y_pred.toarray())
    rankingloss = label_ranking_loss(y_test, y_pred.toarray())
    print("Hamming: " + str(hamming))
    print("Subset Accuracy: " + str(subset_accuracy))
    print("Recall: " + str(recall))
    print("Precision: " + str(precision))
    print("F1: " + str(f1))
    print("Coverage error: " + str(coverage))
    print("Average Precision Score: " + str(aps))
    print("Ranking Loss: " + str(rankingloss))
    print("\n")

    return hamming, subset_accuracy, recall, precision, f1, coverage, aps, rankingloss
Beispiel #7
0
def evaluation(y_pred, y_prob, y_true):
    coverage = coverage_error(y_true, y_prob)
    hamming = hamming_loss(y_true, y_pred)
    ranking_loss = label_ranking_loss(y_true, y_prob)

    f1_macro = metrics.f1_score(y_true, y_pred, average='macro')
    f1_micro = metrics.f1_score(y_true, y_pred, average='micro')

    acc = 0
    for i in range(y_true.shape[0]):
        acc += jaccard_similarity_score(
            y_true.iloc[i, :], y_pred.iloc[i, :])  # jaccard_similarity_score
    acc = acc / y_true.shape[0]

    zero_one = zero_one_loss(y_true, y_pred)  # 0-1 error

    performance = {
        "coverage_error": coverage,
        "ranking_loss": ranking_loss,
        "hamming_loss": hamming,
        "f1_macro": f1_macro,
        "f1_micro": f1_micro,
        "Jaccard_Index": acc,
        "zero_one_error": zero_one
    }
    return performance
Beispiel #8
0
def metric(pred_prob, label, inclusion_index_set, threshold=0.5):
    # label, pred_prob structure: [n_classes, n_samples]
    included_pred_prob = list()
    included_label = list()
    for index in inclusion_index_set:
        included_pred_prob.append(pred_prob[index])
        included_label.append(label[index])
    prob = np.array(included_pred_prob).transpose()
    pred = np.array(included_pred_prob).transpose() > threshold
    true = np.array(included_label).transpose()

    micro_auc = roc_auc_score(true, prob, average='micro')
    macro_auc = roc_auc_score(true, prob, average='macro')
    micro_f1 = f1_score(true, pred, average='micro')
    macro_f1 = f1_score(true, pred, average='macro')
    micro_avg_precision = average_precision_score(true, prob, average='micro')
    macro_avg_precision = average_precision_score(true, prob, average='macro')
    coverage = coverage_error(true, prob)
    ranking_loss = label_ranking_loss(true, prob)
    hamming = hamming_loss(true, pred)
    fuse = np.concatenate([prob[:, :, np.newaxis], true[:, :, np.newaxis]], axis=2).transpose([1, 0, 2])
    top_1_num = top_k_num(fuse, 1)
    top_3_num = top_k_num(fuse, 3)
    top_5_num = top_k_num(fuse, 5)
    top_10_num = top_k_num(fuse, 10)
    top_20_num = top_k_num(fuse, 20)
    top_30_num = top_k_num(fuse, 30)
    top_40_num = top_k_num(fuse, 40)
    top_50_num = top_k_num(fuse, 50)

    return macro_auc, micro_auc, micro_f1, macro_f1, micro_avg_precision, macro_avg_precision, coverage, ranking_loss, \
        hamming, top_1_num, top_3_num, top_5_num, top_10_num, top_20_num, top_30_num, top_40_num, top_50_num
Beispiel #9
0
    def update_from_numpy(self, preds, labels):
        for pred, label, cls in zip(zip(*preds), zip(*labels), self.confusion):
            true_pos = np.sum([p and l for p, l in zip(pred, label)])
            true_neg = np.sum([not p and not l for p, l in zip(pred, label)])
            false_pos = np.sum([p and not l for p, l in zip(pred, label)])
            false_neg = np.sum([not p and l for p, l in zip(pred, label)])

            self.num_true_positives += true_pos
            self.num_true_positives += true_neg
            self.num_false_positives += false_pos
            self.num_false_negatives += false_neg

            cls["true_pos"] += true_pos
            cls["true_neg"] += true_neg
            cls["false_pos"] += false_pos
            cls["false_neg"] += false_neg
            cls["support"] += true_pos + false_neg

        n = len(preds)

        self.n += n
        self.ranking_loss += label_ranking_loss(labels, preds) * n
        self.coverage += coverage_error(labels, preds) * n
        self.average_precision += label_ranking_average_precision_score(
            labels, preds) * n

        for pred, label in zip(preds, labels):
            lowest_rank_prediction = np.argsort(pred)[-1]
            label = np.argwhere(label)

            if lowest_rank_prediction not in label:
                self.one_error += 1
Beispiel #10
0
    def on_epoch_end(self, epoch, logs={}):
        result = self.model.predict_generator(self.val_gen,
                                            steps=self.val_gen.n / BATCH,
                                            verbose=1)

        print(self.y[0])
        print(result[0])
        roc_auc = metrics.roc_auc_score(self.y.ravel(), result.ravel())
        print('\r Micro val_roc_auc: %s' % (str(round(roc_auc,4))), end=100*' '+'\n')

        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(14):
            fpr[i], tpr[i], _ = roc_curve(self.y[:, i], result[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            print("Class " + str(i) + "auc = " + str(roc_auc[i]))


        value = coverage_error(self.y, result)
        print('\r coverage_error: %s' % (str(round(value,4))), end=100*' '+'\n')

        value = label_ranking_loss(self.y, result)
        print('\r label_ranking_loss: %s' % (str(round(value, 4))), end=100 * ' ' + '\n')

        roc_auc = label_ranking_average_precision_score(self.y, result)
        print('\r label_ranking_average_precision_score: %s' % (str(round(roc_auc,4))), end=100*' '+'\n')

        return
def evaluate(predictions, labels, threshold=0.5):
    '''
        True Positive  :  Label : 1, Prediction : 1
        False Positive :  Label : 0, Prediction : 1
        False Negative :  Label : 0, Prediction : 0
        True Negative  :  Label : 1, Prediction : 0
        Precision      :  TP/(TP + FP)
        Recall         :  TP/(TP + FN)
        F Score        :  2.P.R/(P + R)
        Ranking Loss   :  The average number of label pairs that are incorrectly ordered given predictions
        Hammming Loss  :  The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels)
    '''
    assert predictions.shape == labels.shape, "Shapes: %s, %s" % (
        predictions.shape,
        labels.shape,
    )
    metrics = dict()
    # print('pre', predictions)
    # print('label', labels)
    metrics['coverage'] = coverage_error(labels, predictions)
    metrics['average_precision'] = label_ranking_average_precision_score(
        labels, predictions)
    metrics['ranking_loss'] = label_ranking_loss(labels, predictions)

    for i in range(predictions.shape[0]):
        predictions[i, :][predictions[i, :] >= threshold] = 1
        predictions[i, :][predictions[i, :] < threshold] = 0

    metrics['bae'] = 0
    metrics['patk'] = patk(predictions, labels)
    metrics['hamming_loss'] = hamming_loss(y_pred=predictions, y_true=labels)
    metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \
        metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions)

    return metrics
Beispiel #12
0
    def cross_validation(self, features):
        '''
        standalone validation of an untrained classifier
        splits the features into a training test set and a set for validation
        Warning: overwrites existing trained model 
        '''
        values, classes, categories = self._features_to_values(features)
        values = np.nan_to_num(values)
        n_classes = len(categories)

        (training_values, test_values, training_classes,
         test_classes) = train_test_split(values,
                                          classes,
                                          test_size=self.validation_split,
                                          random_state=self.seed)

        self._train(np.array(training_values), training_classes, n_classes)
        predictions = self._predict(np.array(test_values))
        predicted_classes = np_utils.probas_to_classes(predictions)
        binary_labels = np_utils.to_categorical(test_classes)

        # compute the metrics
        accuracy = accuracy_score(test_classes, predicted_classes)
        precision_score = average_precision_score(binary_labels, predictions)
        error = coverage_error(binary_labels, predictions)
        loss = label_ranking_loss(binary_labels, predictions)
        label_precision = label_ranking_average_precision_score(
            binary_labels, predictions)

        real_cat = categories[test_classes]
        predicted_cat = categories[predicted_classes]
        return (real_cat, predicted_cat, accuracy, precision_score, error,
                loss, label_precision)
def evaluate_model(model, X_test, y_test, category_names):
    """
    Evaluate the performance of the model on test data using coverage error and classification report

    Args:
        model: (sklearn.model_selectionGridSearchCV) estimator created from build_model()
        X_test: (pandas.DataFrame) containing the features for the test data
        y_test: (pandas.DataFrame) containing the multilabel targets for the test data
        category_names: (list) containing the target names from load_data()
    Returns:
        None
    """
    try:
        y_pred = model.predict(X_test)
        y_test_avg_labels = round(np.mean(y_test.sum(axis=1)), 2)

        print("Printing classification report...\n")
        y_pred = model.predict(X_test).todense()

        i = -1
        for col in category_names:
            i += 1
            ytrue = y_test[col]
            ypred = y_pred[:, i]
            print(col)
            print(classification_report(ytrue, ypred))
            print('-' * 60)

        print("\n Printing coverage error...\n")
        print(round(coverage_error(y_test, y_pred), 2))
        print(
            f"\n Average number of true labels per sample in test sample: {y_test_avg_labels}"
        )
    except:
        raise Exception("Could not evaluate model.")
Beispiel #14
0
def compute_evaluation(true_matrix, predict_matrix):
    h = hamming_loss(true_matrix, predict_matrix)
    z = zero_one_loss(true_matrix, predict_matrix)
    c = coverage_error(true_matrix, predict_matrix)

    result = [h, z, c]

    return result
Beispiel #15
0
def evaluate_ouput(y_test, output):
    metrics = dict()
    metrics['coverage'] = coverage_error(y_test, output)
    metrics['average_precision'] = label_ranking_average_precision_score(
        y_test, output)
    metrics['ranking_loss'] = label_ranking_loss(y_test, output)
    metrics['one_error'] = OneError(output, y_test)

    return metrics
Beispiel #16
0
def compute_evaluation(true_matrix, predict_matrix):
    h = hamming_loss(true_matrix, predict_matrix)
    z = zero_one_loss(true_matrix, predict_matrix)
    c = coverage_error(true_matrix, predict_matrix) - 1
    r = label_ranking_loss(true_matrix, predict_matrix)
    a = average_precision_score(true_matrix, predict_matrix)

    result = [h, z, c, r, a]
    return result
Beispiel #17
0
def coverage(vote, target, no_labels):
    vote0 = zeros(no_labels)
    target0 = zeros(no_labels)
    for k, v in vote.items():
        vote0[k] = v
    for t in target:
        target0[t] = 1.0
    vote0 = vote0.reshape((1, no_labels))
    target0 = target0.reshape((1, no_labels))
    return coverage_error(target0, vote0)
Beispiel #18
0
def get_score(y_true, y_pred, labels=None):
    scores = {}
    scores["lrap"] = label_ranking_average_precision_score(y_true, y_pred)
    scores["lrloss"] = label_ranking_loss(y_true, y_pred)
    scores["ndcg_score"] = ndcg_score(y_true, y_pred)
    scores["coverage_error"] = coverage_error(y_true, y_pred)
    try:
        scores["hamming_loss"] = hamming_loss(y_true, y_pred)
    except:
        scores["hamming_loss"] = None
    try:
        scores["subset_accuracy"] = accuracy_score(y_true, y_pred)
    except:
        scores["subset_accuracy"] = None

    for avg in [None, "micro", "macro", "weighted", "samples"]:
        if avg:
            avg_suffix = f"_{avg}"
            try:
                (
                    scores[f"precision{avg_suffix}"],
                    scores[f"recall{avg_suffix}"],
                    scores[f"f1{avg_suffix}"],
                    _,
                ) = precision_recall_fscore_support(y_true, y_pred, average=avg)
            except:
                (
                    scores[f"precision{avg_suffix}"],
                    scores[f"recall{avg_suffix}"],
                    scores[f"f1{avg_suffix}"],
                ) = (None, None, None)
            try:
                scores[f"roc_auc{avg_suffix}"] = roc_auc_score(
                    y_true, y_pred, average=avg
                )
            except:
                scores[f"roc_auc{avg_suffix}"] = None
        else:
            try:
                p, r, f, _ = precision_recall_fscore_support(y_true, y_pred)
                scores[f"precision"], scores[f"recall"], scores[f"f1"] = (
                    dict(zip(labels, list(sc))) for sc in (p, r, f)
                )
            except:
                scores[f"precision"], scores[f"recall"], scores[f"f1"] = (
                    None,
                    None,
                    None,
                )
            try:
                scores["roc_auc"] = roc_auc_score(y_true, y_pred)
            except:
                scores["roc_auc"] = None

    return scores
Beispiel #19
0
 def get_classification_report_2(self,train_y, predicted_score, verbose = 1):
     cov_err = metrics.coverage_error(train_y,predicted_score)
     label_rank_avg_prec = metrics.label_ranking_average_precision_score(train_y, predicted_score)
     rank_loss = metrics.label_ranking_loss(train_y, predicted_score)
     log_loss = metrics.log_loss(train_y, predicted_score)
     if(verbose):
         print('CoverageError', cov_err)
         print('LabelRankingAvgPrec', label_rank_avg_prec)
         print('LabelRankingLoss', rank_loss)
         print('log_loss', log_loss)
     return [cov_err, label_rank_avg_prec, rank_loss, log_loss]
Beispiel #20
0
def print_predict(ground_truth, prediction, hyper_params):
    rounded = 4
    AUC_macro = round(roc_auc_score(ground_truth, prediction, average='macro'),
                      rounded)
    AUC_micro = round(roc_auc_score(ground_truth, prediction, average='micro'),
                      rounded)
    Coverage_error = round(
        (coverage_error(ground_truth, prediction)) / ground_truth.shape[1],
        rounded)
    rankloss = round(label_ranking_loss(ground_truth, prediction), rounded)
    One_error = round(one_error(ground_truth, prediction), rounded)
    Precision_at_ks = precision_at_ks(ground_truth, prediction)
    Log_loss = round(log_loss(ground_truth, prediction), rounded)
    Average_precision_score = round(
        average_precision_score(ground_truth, prediction), rounded)

    prediction = np.round(prediction)

    F1_Micro = round(f1_score(ground_truth, prediction, average='micro'),
                     rounded)
    Hamming_loss = round(hamming_loss(ground_truth, prediction), rounded)
    Accuracy = round(accuracy_score(ground_truth, prediction), rounded)
    Recall_score_macro = round(
        recall_score(ground_truth, prediction, average='macro'), rounded)
    Recall_score_micro = round(
        recall_score(ground_truth, prediction, average='micro'), rounded)
    Precision_score_macro = round(
        precision_score(ground_truth, prediction, average='macro'), rounded)
    Precision_score_micro = round(
        precision_score(ground_truth, prediction, average='micro'), rounded)
    Jaccard_score_macro = round(
        jaccard_score(ground_truth, prediction, average='macro'), rounded)
    Jaccard_score_micro = round(
        jaccard_score(ground_truth, prediction, average='micro'), rounded)

    print('Recall_score_macro:   ', Recall_score_macro)
    print('Recall_score_micro:   ', Recall_score_micro)
    print('Precision_score_macro:   ', Precision_score_macro)
    print('Precision_score_micro:   ', Precision_score_micro)
    print('Jaccard_score_macro:   ', Jaccard_score_macro)
    print('Jaccard_score_micro:   ', Jaccard_score_micro)
    print("Accuracy = ", Accuracy)
    print('precision_at_ks: ', Precision_at_ks)
    print('Hamming_loss: ', Hamming_loss)
    print('Log_loss:  ', Log_loss)
    print('Average_precision_score: ', Average_precision_score)
    print('F1_Micro ', F1_Micro)
    print('One_error: ', One_error)
    print('Ranking loss: ', rankloss)
    print('coverage: ', Coverage_error)
    print('AUC-micro:   ', AUC_micro)
    print('AUC-macro:   ', AUC_macro)

    print('\n')
Beispiel #21
0
def treino_binarizacao(X, Y):
    labels = [
        'Latitude', 'Longitude', 'DiaSemChuva', 'Precipitacao', 'RiscoFogo',
        'TempBulboSecoEst1', 'TempBulboUmidoEst1', 'UmidadeRelativaEst1',
        'DirecaoVentoEst1', 'VelocidadeVentoNebulosidadeEst1',
        'DistanciaParaEst1', 'TempBulboSecoEst2', 'TempBulboUmidoEst2',
        'UmidadeRelativaEst2', 'DirecaoVentoEst2',
        'VelocidadeVentoNebulosidadeEst2', 'DistanciaParaEst2'
    ]
    mlb = MultiLabelBinarizer()
    Ybin = mlb.fit_transform(Y)
    mlp = neuralnetwork.MLPClassifier(hidden_layer_sizes=(10, 4),
                                      activation='tanh',
                                      solver='lbfgs',
                                      learning_rate='invscaling',
                                      random_state=2818,
                                      max_iter=400,
                                      early_stopping=True)
    x_train, x_test, y_train, y_test = model.train_test_split(X,
                                                              Y,
                                                              train_size=0.33)
    mlp.fit(x_train, y_train)
    y_pred = mlp.predict(x_test)
    print("Erro de cobertura:" + str(metrics.coverage_error(y_test, y_pred)))
    print("Precisão média de labels:" +
          str(metrics.label_ranking_average_precision_score(y_test, y_pred)))
    print("Perda de ranks:" + str(metrics.label_ranking_loss(y_test, y_pred)))
    matriz = matriz_confusao(y_test, y_pred)
    results = {
        "Erro de cobertura":
        metrics.coverage_error(y_test, y_pred),
        "Precisão média de labels":
        metrics.label_ranking_average_precision_score(y_test, y_pred),
        "Perda de ranks":
        metrics.label_ranking_loss(y_test, y_pred),
        "Matrizes":
        matriz
    }
    res_df = pd.DataFrame(results)
    res_df.to_csv(
        "C:\\Users\Livnick\Documents\dadosFocos\ResultadosMAcomMatriz2.csv")
Beispiel #22
0
def Coverage(labels, probs, mode=1):
    '''
    用于度量平均上需要多少步才能遍历样本所有的相关标记
    @labels: true labels of samples
    @probs:  label's probility  of samples
    '''
    if mode:
        steps = coverage_error(labels, probs)
    else:
        steps = np.mean(list(map(_coverage, probs, labels)))

    return steps
Beispiel #23
0
 def coverage_error(self):
     """
     The coverage_error function computes the average number of labels 
     that have to be included in the final prediction such that all true 
     labels are predicted. This is useful if you want to know how many 
     top-scored-labels you have to predict in average without missing any 
     true one. The best value of this metrics is thus the average number 
     of true labels.
     """
     self.coverage_error = metrics.coverage_error(
         self.ground_truth, self.predictions_raw)
     avg_true_labels = np.count_nonzero(self.ground_truth) / self.ntrials
     ce_message = 'Coverage Error [' + str(avg_true_labels) + ', ~): '
     return ce_message + str(self.coverage_error)
Beispiel #24
0
def get_avg_results(hat_y, y):
    values = {}
    values['avg_precision_micro'] = average_precision_score(y,
                                                            hat_y,
                                                            average='micro')
    #    values['avg_precision_macro'] = average_precision_score(y, hat_y, average = 'macro')
    values['roc_auc_score_micro'] = roc_auc_score(y, hat_y, average='micro')
    #    values['roc_auc_score_macro'] = roc_auc_score(y, hat_y, average = 'macro')
    values['coverage_error'] = coverage_error(y, hat_y)
    values[
        'label_ranking_average_precision_score'] = label_ranking_average_precision_score(
            y, hat_y)
    values['label_ranking_loss'] = label_ranking_loss(y, hat_y)
    return values
Beispiel #25
0
 def coverage_error(self):
     """
     The coverage_error function computes the average number of labels 
     that have to be included in the final prediction such that all true 
     labels are predicted. This is useful if you want to know how many 
     top-scored-labels you have to predict in average without missing any 
     true one. The best value of this metrics is thus the average number 
     of true labels.
     """
     self.coverage_error = metrics.coverage_error(self.ground_truth,
                                                  self.predictions_raw)
     avg_true_labels = np.count_nonzero(self.ground_truth) / self.ntrials
     ce_message = 'Coverage Error [' + str(avg_true_labels) + ', ~): '
     return ce_message + str(self.coverage_error)
Beispiel #26
0
def coverage_err(y_true, y_pred):
    """
    Coverage error:
    For every sample, how far down the ranked list of predicted classes must we reach to get all
    actual class labels? The average value of this metric across samples is the coverage error.

    :param y_true: array of shape (n_samples, n_labels)
    :param y_pred: array of shape (n_samples, n_labels)
    :return: coverage_error, float
    """
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    # We want to exclude rows with 0 labels from the function
    rows_to_keep = np.sum(y_true, axis = 1) != 0
    return coverage_error(y_true[rows_to_keep], y_pred[rows_to_keep])
Beispiel #27
0
def _multi_class(y_true=0, y_pred=0):
    a = label_ranking_average_precision_score(y_true, y_pred)
    b = label_ranking_loss(y_true, y_pred)
    c = coverage_error(y_true, y_pred)
    erg = np.zeros((1, 3))
    erg[0, 0] = a
    erg[0, 1] = b
    erg[0, 2] = c

    res_mclass = pd.DataFrame(data=erg,
                              columns=[
                                  'label_ranking_average_precision_score',
                                  'label_ranking_loss', 'coverage_error'
                              ])

    return res_mclass
Beispiel #28
0
def _generate_classification_reports(y_true, y_pred, target_names=None):
    # Calculate additional stats
    total_accuracy = accuracy_score(y_true, y_pred)
    cov_error = coverage_error(y_true, y_pred)
    lrap = label_ranking_average_precision_score(y_true, y_pred)

    report = metrics.multilabel_prediction_report(y_true, y_pred)
    report += '\n\n'
    report += metrics.multilabel_classification_report(y_true, y_pred, target_names=target_names)
    report += '\n\n'
    report += 'coverage error:  %.3f' % cov_error
    report += '\n'
    report += 'LRAP:            %.3f' % lrap
    report += '\n'
    report += 'total accuracy:  %.3f' % total_accuracy
    return report
Beispiel #29
0
def eval_metrics(model):

    y_gold = model["target_codec"].transform(model["data"]["test"]["y"])
    y_pred = model["test_predicted"]

    test_res = zero_one_loss(y_gold, y_pred)
    print('Zero_One_loss: %.4f' % test_res)

    test_res = coverage_error(y_gold, y_pred)
    print('coverage_error: %.4f' % test_res)

    test_res = label_ranking_average_precision_score(y_gold, y_pred)
    print('LRAP: %.4f' % test_res)

    test_res = r2_score(y_gold, y_pred)
    print('r2_score: %.4f' % test_res)
def multilabel_metrics(pred_list, verbose, extra_vars, split):
    '''
    Multiclass classification metrics
    see multilabel ranking metrics in sklearn library for more info:
        http://scikit-learn.org/stable/modules/model_evaluation.html#multilabel-ranking-metrics
        
    # Arguments
        gt_list, dictionary of reference sentences
        pred_list, dictionary of hypothesis sentences
        verbose - if greater than 0 the metric measures are printed out
        extra_vars - extra variables, here are:
                extra_vars['word2idx'] - dictionary mapping from words to indices
                extra_vars['references'] - list of GT labels
    '''
    word2idx = extra_vars[split]['word2idx']
    n_classes = len(word2idx)
    n_samples = len(pred_list)

    # Create prediction matrix
    y_pred = np.zeros((n_samples, n_classes))
    for i_s, sample in enumerate(pred_list):
        for word in sample:
            y_pred[i_s, word2idx[word]] = 1

    gt_list = extra_vars[split]['references']
    y_gt = np.array(gt_list)

    # Compute Coverage Error
    coverr = sklearn_metrics.coverage_error(y_gt, y_pred)
    # Compute Label Ranking AvgPrec
    avgprec = sklearn_metrics.label_ranking_average_precision_score(
        y_gt, y_pred)
    # Compute Label Ranking Loss
    rankloss = sklearn_metrics.label_ranking_loss(y_gt, y_pred)

    if verbose > 0:
        logging.info('Coverage Error (best: avg labels per sample = %f): %f' %
                     (np.sum(y_gt) / float(n_samples), coverr))
        logging.info('Label Ranking Average Precision (best: 1.0): %f' %
                     avgprec)
        logging.info('Label Ranking Loss (best: 0.0): %f' % rankloss)

    return {
        'coverage error': coverr,
        'average precision': avgprec,
        'ranking loss': rankloss
    }
def _generate_classification_reports(y_true, y_pred, target_names=None):
    # Calculate additional stats
    total_accuracy = accuracy_score(y_true, y_pred)
    cov_error = coverage_error(y_true, y_pred)
    lrap = label_ranking_average_precision_score(y_true, y_pred)

    report = metrics.multilabel_prediction_report(y_true, y_pred)
    report += '\n\n'
    report += metrics.multilabel_classification_report(
        y_true, y_pred, target_names=target_names)
    report += '\n\n'
    report += 'coverage error:  %.3f' % cov_error
    report += '\n'
    report += 'LRAP:            %.3f' % lrap
    report += '\n'
    report += 'total accuracy:  %.3f' % total_accuracy
    return report
Beispiel #32
0
def calcu_one_metric(scores, labels, metric, threshold=None):
    ans = None

    if metric == 'mean_average_precision':
        scores, labels = _filter_all_negative(scores, labels)
        ans = metrics.average_precision_score(labels, scores)

    elif metric == 'macro_auc':
        scores, labels = _filter_all_negative(scores, labels)
        ans = metrics.roc_auc_score(labels, scores, average='macro')

    elif metric == 'micro_auc':
        scores, labels = _filter_all_negative(scores, labels)
        ans = metrics.roc_auc_score(labels, scores, average='micro')

    elif metric == 'macro_f1':
        scores, labels = _filter_all_negative(scores, labels)
        pred = pred_from_score(scores, threshold)
        ans = metrics.f1_score(labels, pred, average='macro')

    elif metric == 'micro_f1':
        scores, labels = _filter_all_negative(scores, labels)
        pred = pred_from_score(scores, threshold)
        ans = metrics.f1_score(labels, pred, average='micro')

    elif metric == 'ranking_mean_average_precision':
        ans = metrics.label_ranking_average_precision_score(labels, scores)

    elif metric == 'coverage':
        cove = metrics.coverage_error(labels, scores)
        # see http://scikit-learn.org/stable/modules/model_evaluation.html#coverage-error
        ans = cove - 1

    elif metric == 'ranking_loss':
        ans = metrics.label_ranking_loss(labels, scores)

    elif metric == 'one_error':
        top_score = np.argmax(scores, axis=1)
        top_label = labels[range(len(top_score)), top_score]
        ans = 1 - np.sum(top_label) / len(top_label)

    else:
        raise f"unsuppored metric: {metric}"

    return ans
def get_metrics(y_true, y_score, y_binary_score):
    """
    create the metrics object containing all relevant metrics
    """
    metrics = {}
    metrics['total_positive'] = np.sum(np.sum(y_binary_score))
    #TODO remove those two when running on the whole set to avoid excessive storage costs
    #metrics['y_true'] = y_true
    #metrics['y_score'] = y_score
    #metrics['y_binary_score'] = y_binary_score
    metrics['coverage_error'] = coverage_error(y_true, y_score)
    metrics['average_num_of_labels'] = round(float(np.sum(np.sum(y_true, axis=1)))/y_true.shape[0], 2)
    #metrics['average_precision_micro'] = sklearn.metrics.average_precision_score(y_true, y_binary_score, average='micro')
    #metrics['average_precision_macro'] = sklearn.metrics.average_precision_score(y_true, y_binary_score, average='macro')
    metrics['precision_micro'] = sklearn.metrics.precision_score(y_true, y_binary_score, average='micro')
    metrics['precision_macro'] = sklearn.metrics.precision_score(y_true, y_binary_score, average='macro')
    metrics['recall_micro'] = sklearn.metrics.recall_score(y_true, y_binary_score, average='micro')
    metrics['recall_macro'] = sklearn.metrics.recall_score(y_true, y_binary_score, average='macro')
    metrics['f1_micro'] = sklearn.metrics.f1_score(y_true, y_binary_score, average='micro')
    metrics['f1_macro'] = sklearn.metrics.f1_score(y_true, y_binary_score, average='macro')

    # only calculate those for cases with a small number of labels (sections only)
    if y_true.shape[1] < 100:
        precision_scores = np.zeros(y_true.shape[1])
        for i in range(0, y_true.shape[1]):
            precision_scores[i] = sklearn.metrics.precision_score(y_true[:,i], y_binary_score[:,i])
        metrics['precision_scores_array'] = precision_scores.tolist()

        recall_scores = np.zeros(y_true.shape[1])
        for i in range(0, y_true.shape[1]):
            recall_scores[i] = sklearn.metrics.recall_score(y_true[:,i], y_binary_score[:,i])
        metrics['recall_scores_array'] = recall_scores.tolist()

        f1_scores = np.zeros(y_true.shape[1])
        for i in range(0, y_true.shape[1]):
            f1_scores[i] = sklearn.metrics.f1_score(y_true[:,i], y_binary_score[:,i])
        metrics['f1_scores_array'] = f1_scores.tolist()

    metrics['top_1'] = get_top_N_percentage(y_score, y_true, max_N=1)
    metrics['top_3'] = get_top_N_percentage(y_score, y_true, max_N=3)
    metrics['top_5'] = get_top_N_percentage(y_score, y_true, max_N=5)

    return metrics
def evaluate(predictions, labels, threshold=0.4, multi_label=True):
    '''
        True Positive  :  Label : 1, Prediction : 1
        False Positive :  Label : 0, Prediction : 1
        False Negative :  Label : 0, Prediction : 0
        True Negative  :  Label : 1, Prediction : 0
        Precision      :  TP/(TP + FP)
        Recall         :  TP/(TP + FN)
        F Score        :  2.P.R/(P + R)
        Ranking Loss   :  The average number of label pairs that are incorrectly ordered given predictions
        Hammming Loss  :  The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels)
    '''
    assert predictions.shape == labels.shape, "Shapes: %s, %s" % (predictions.shape, labels.shape,)
    metrics = dict()
    if not multi_label:
        metrics['bae'] = BAE(labels, predictions)
        labels, predictions = np.argmax(labels, axis=1), np.argmax(predictions, axis=1)

        metrics['accuracy'] = accuracy_score(labels, predictions)
        metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], _ = \
            precision_recall_fscore_support(labels, predictions, average='micro')
        metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['coverage'], \
            metrics['average_precision'], metrics['ranking_loss'], metrics['pak'], metrics['hamming_loss'] \
            = 0, 0, 0, 0, 0, 0, 0, 0

    else:
        metrics['coverage'] = coverage_error(labels, predictions)
        metrics['average_precision'] = label_ranking_average_precision_score(labels, predictions)
        metrics['ranking_loss'] = label_ranking_loss(labels, predictions)
        
        for i in range(predictions.shape[0]):
            predictions[i, :][predictions[i, :] >= threshold] = 1
            predictions[i, :][predictions[i, :] < threshold] = 0

        metrics['bae'] = 0
        metrics['patk'] = patk(predictions, labels)
        metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \
            metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions)
    return metrics
def multilabel_metrics(pred_list, verbose, extra_vars, split):
    """
    Multiclass classification metrics. see multilabel ranking metrics in sklearn library for more info:
        http://scikit-learn.org/stable/modules/model_evaluation.html#multilabel-ranking-metrics

    :param pred_list: dictionary of hypothesis sentences
    :param verbose: if greater than 0 the metric measures are printed out
    :param extra_vars: extra variables
                        extra_vars['word2idx'] - dictionary mapping from words to indices
                        extra_vars['references'] - list of GT labels
    :param split: split on which we are evaluating
    :return: Dictionary of multilabel metrics
    """
    from sklearn import metrics as sklearn_metrics

    word2idx = extra_vars[split]['word2idx']

    # check if an additional dictionary matching raw to basic and general labels is provided
    # in that case a more general evaluation will be considered
    raw2basic = extra_vars[split].get('raw2basic', None)
    if raw2basic is not None:
        logging.info('Applying general evaluation with raw2basic dictionary.')

    if raw2basic is None:
        n_classes = len(word2idx)
    else:
        basic_values = set(raw2basic.values())
        n_classes = len(basic_values)
    n_samples = len(pred_list)

    # Create prediction matrix
    y_pred = np.zeros((n_samples, n_classes))
    for i_s, sample in list(enumerate(pred_list)):
        for word in sample:
            if raw2basic is None:
                y_pred[i_s, word2idx[word]] = 1
            else:
                word = word.strip()
                y_pred[i_s, raw2basic[word]] = 1

    # Prepare GT
    gt_list = extra_vars[split]['references']

    if raw2basic is None:
        y_gt = np.array(gt_list)
    else:
        idx2word = {v: k for k, v in iteritems(word2idx)}
        y_gt = np.zeros((n_samples, n_classes))
        for i_s, sample in list(enumerate(gt_list)):
            for raw_idx, is_active in list(enumerate(sample)):
                if is_active:
                    word = idx2word[raw_idx].strip()
                    y_gt[i_s, raw2basic[word]] = 1

    # Compute Coverage Error
    coverr = sklearn_metrics.coverage_error(y_gt, y_pred)
    # Compute Label Ranking AvgPrec
    avgprec = sklearn_metrics.label_ranking_average_precision_score(y_gt, y_pred)
    # Compute Label Ranking Loss
    rankloss = sklearn_metrics.label_ranking_loss(y_gt, y_pred)
    # Compute Precision, Recall and F1 score
    precision, recall, f1, _ = sklearn_metrics.precision_recall_fscore_support(y_gt, y_pred, average='micro')

    if verbose > 0:
        logging.info(
            '"coverage_error" (best: avg labels per sample = %f): %f' % (float(np.sum(y_gt)) / float(n_samples), coverr))
        logging.info('Label Ranking "average_precision" (best: 1.0): %f' % avgprec)
        logging.info('Label "ranking_loss" (best: 0.0): %f' % rankloss)
        logging.info('precision: %f' % precision)
        logging.info('recall: %f' % recall)
        logging.info('f1: %f' % f1)

    return {'coverage_error': coverr,
            'average_precision': avgprec,
            'ranking_loss': rankloss,
            'precision': precision,
            'recall': recall,
            'f1': f1}
Beispiel #36
0
  im.close()

xTrain /= 255
#xTrain = xTrain.reshape(xTrain.shape[0], 1, 40, 30).astype('float32')
#print(xTrain.shape)

#xTrain /= xTrain.std(axis = None)
#xTrain -= xTrain.mean()

y = np.array([int(x[-1:]) for x in trainingLabels['classname']]).astype('int32')
#y = to_categorical(y, 10)
print(y.shape)

x_fit, x_eval, y_fit, y_eval = cross_validation.train_test_split(xTrain, y, test_size=0.2)

clf = xgb.XGBClassifier(objective='multi:softmax', n_estimators=200, learning_rate=0.05, max_depth=20, nthread=4, subsample=0.7, colsample_bytree=0.85, seed=2471)

clf.fit(x_fit, y_fit, early_stopping_rounds=20, eval_metric='mlogloss', eval_set=[(x_eval, y_eval)])

clf.fit(xTrain, y)
predictY = clf.predict_proba(xTrain)

from sklearn import metrics

y = to_categorical(y, 10)
print(metrics.coverage_error(y, predictY))

with open(pickleFile,'wb') as f:
  sys.setrecursionlimit(20000)
  pickle.dump(clf, f)
Beispiel #37
0
def test_coverage_error():
    # Toy case
    assert_almost_equal(coverage_error([[0, 1]], [[0.25, 0.75]]), 1)
    assert_almost_equal(coverage_error([[0, 1]], [[0.75, 0.25]]), 2)
    assert_almost_equal(coverage_error([[1, 1]], [[0.75, 0.25]]), 2)
    assert_almost_equal(coverage_error([[0, 0]], [[0.75, 0.25]]), 0)

    assert_almost_equal(coverage_error([[0, 0, 0]], [[0.25, 0.5, 0.75]]), 0)
    assert_almost_equal(coverage_error([[0, 0, 1]], [[0.25, 0.5, 0.75]]), 1)
    assert_almost_equal(coverage_error([[0, 1, 0]], [[0.25, 0.5, 0.75]]), 2)
    assert_almost_equal(coverage_error([[0, 1, 1]], [[0.25, 0.5, 0.75]]), 2)
    assert_almost_equal(coverage_error([[1, 0, 0]], [[0.25, 0.5, 0.75]]), 3)
    assert_almost_equal(coverage_error([[1, 0, 1]], [[0.25, 0.5, 0.75]]), 3)
    assert_almost_equal(coverage_error([[1, 1, 0]], [[0.25, 0.5, 0.75]]), 3)
    assert_almost_equal(coverage_error([[1, 1, 1]], [[0.25, 0.5, 0.75]]), 3)

    assert_almost_equal(coverage_error([[0, 0, 0]], [[0.75, 0.5, 0.25]]), 0)
    assert_almost_equal(coverage_error([[0, 0, 1]], [[0.75, 0.5, 0.25]]), 3)
    assert_almost_equal(coverage_error([[0, 1, 0]], [[0.75, 0.5, 0.25]]), 2)
    assert_almost_equal(coverage_error([[0, 1, 1]], [[0.75, 0.5, 0.25]]), 3)
    assert_almost_equal(coverage_error([[1, 0, 0]], [[0.75, 0.5, 0.25]]), 1)
    assert_almost_equal(coverage_error([[1, 0, 1]], [[0.75, 0.5, 0.25]]), 3)
    assert_almost_equal(coverage_error([[1, 1, 0]], [[0.75, 0.5, 0.25]]), 2)
    assert_almost_equal(coverage_error([[1, 1, 1]], [[0.75, 0.5, 0.25]]), 3)

    assert_almost_equal(coverage_error([[0, 0, 0]], [[0.5, 0.75, 0.25]]), 0)
    assert_almost_equal(coverage_error([[0, 0, 1]], [[0.5, 0.75, 0.25]]), 3)
    assert_almost_equal(coverage_error([[0, 1, 0]], [[0.5, 0.75, 0.25]]), 1)
    assert_almost_equal(coverage_error([[0, 1, 1]], [[0.5, 0.75, 0.25]]), 3)
    assert_almost_equal(coverage_error([[1, 0, 0]], [[0.5, 0.75, 0.25]]), 2)
    assert_almost_equal(coverage_error([[1, 0, 1]], [[0.5, 0.75, 0.25]]), 3)
    assert_almost_equal(coverage_error([[1, 1, 0]], [[0.5, 0.75, 0.25]]), 2)
    assert_almost_equal(coverage_error([[1, 1, 1]], [[0.5, 0.75, 0.25]]), 3)

    # Non trival case
    assert_almost_equal(coverage_error([[0, 1, 0], [1, 1, 0]],
                                       [[0.1, 10., -3], [0, 1, 3]]),
                        (1 + 3) / 2.)

    assert_almost_equal(coverage_error([[0, 1, 0], [1, 1, 0], [0, 1, 1]],
                                       [[0.1, 10, -3], [0, 1, 3], [0, 2, 0]]),
                        (1 + 3 + 3) / 3.)

    assert_almost_equal(coverage_error([[0, 1, 0], [1, 1, 0], [0, 1, 1]],
                                       [[0.1, 10, -3], [3, 1, 3], [0, 2, 0]]),
                        (1 + 3 + 3) / 3.)
Beispiel #38
0
def test_coverage_tie_handling():
    assert_almost_equal(coverage_error([[0, 0]], [[0.5, 0.5]]), 0)
    assert_almost_equal(coverage_error([[1, 0]], [[0.5, 0.5]]), 2)
    assert_almost_equal(coverage_error([[0, 1]], [[0.5, 0.5]]), 2)
    assert_almost_equal(coverage_error([[1, 1]], [[0.5, 0.5]]), 2)

    assert_almost_equal(coverage_error([[0, 0, 0]], [[0.25, 0.5, 0.5]]), 0)
    assert_almost_equal(coverage_error([[0, 0, 1]], [[0.25, 0.5, 0.5]]), 2)
    assert_almost_equal(coverage_error([[0, 1, 0]], [[0.25, 0.5, 0.5]]), 2)
    assert_almost_equal(coverage_error([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 2)
    assert_almost_equal(coverage_error([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 3)
    assert_almost_equal(coverage_error([[1, 0, 1]], [[0.25, 0.5, 0.5]]), 3)
    assert_almost_equal(coverage_error([[1, 1, 0]], [[0.25, 0.5, 0.5]]), 3)
    assert_almost_equal(coverage_error([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 3)
Beispiel #39
0
 def coverage_error(self):
     self.coverage_error = metrics.coverage_error(self.ground_truth, self.predictions_raw)
     avg_true_labels = np.count_nonzero(self.ground_truth) / self.ntrials
     return 'Coverage Error [' + str(avg_true_labels) + ', ~): ' + str(self.coverage_error)