Beispiel #1
0
def label_rank_loss(arr, arr1, arr2, arr3):

    a3 = label_ranking_loss(arr, arr1)
    b3 = label_ranking_loss(arr, arr2)
    c3 = label_ranking_loss(arr, arr3)

    print("Ranking Loss Scores for the three classifiers are")
    print("Using Binary Relevance: " + str(a3))
    print("Using Classifier Chain: " + str(b3))
    print("Using LabelPowerSet: " + str(c3))
    print("\n")
Beispiel #2
0
def use_sklearn_ml_knn():
    """

    :return:
    """

    base_path = os.getcwd()
    # train_x = np.load(os.path.join(base_path, 'dataset/train_x.npy'), allow_pickle=True)
    # train_y = np.load(os.path.join(base_path, 'dataset/train_y.npy'), allow_pickle=True)

    train_x = np.load(os.path.join(base_path, 'my_dataset/train_x.npy'),
                      allow_pickle=True)
    train_y = np.load(os.path.join(base_path, 'my_dataset/train_y.npy'),
                      allow_pickle=True)

    new_train_y = []
    for tup in train_y:
        tmp = []
        for label in tup:
            if label == 0:
                tmp.append(0)
            else:
                tmp.append(1)
        new_train_y.append(tmp)

    # test_x = np.load('dataset/test_x.npy', allow_pickle=True)
    # test_y = np.load('dataset/test_y.npy', allow_pickle=True)

    test_x = np.load('my_dataset/test_x.npy', allow_pickle=True)
    test_y = np.load('my_dataset/test_y.npy', allow_pickle=True)
    new_test_y = []
    for tup in test_y:
        tmp = []
        for label in tup:
            if label == 0:
                tmp.append(0)
            else:
                tmp.append(1)
        new_test_y.append(tmp)

    new_test_y = np.array(new_test_y)

    classifier = MLkNN2(train_x, np.array(new_train_y), k=10)

    # classifier.fit(train_x, np.array(new_train_y))
    classifier.fit()
    predictions = classifier.predict(test_x)
    predictions = convert_prediction(predictions)

    # hamming_loss = HammingLoss(new_test_y, predictions)
    h_loss = hamming_loss(new_test_y, predictions)
    z = zero_one_loss(new_test_y, predictions)
    c = coverage_error(new_test_y, predictions)
    r = label_ranking_loss(new_test_y, predictions)
    a = average_precision_score(new_test_y, predictions)
    print('hamming_loss = ', h_loss)
    print('0-1_loss = ', z)
    print('cover_loss = ', c)
    print('rank_loss = ', r)
    print('average_loss = ', a)
Beispiel #3
0
def evaluation(y_pred, y_prob, y_true):
    coverage = coverage_error(y_true, y_prob)
    hamming = hamming_loss(y_true, y_pred)
    ranking_loss = label_ranking_loss(y_true, y_prob)

    f1_macro = metrics.f1_score(y_true, y_pred, average='macro')
    f1_micro = metrics.f1_score(y_true, y_pred, average='micro')

    acc = 0
    for i in range(y_true.shape[0]):
        acc += jaccard_similarity_score(
            y_true.iloc[i, :], y_pred.iloc[i, :])  # jaccard_similarity_score
    acc = acc / y_true.shape[0]

    zero_one = zero_one_loss(y_true, y_pred)  # 0-1 error

    performance = {
        "coverage_error": coverage,
        "ranking_loss": ranking_loss,
        "hamming_loss": hamming,
        "f1_macro": f1_macro,
        "f1_micro": f1_micro,
        "Jaccard_Index": acc,
        "zero_one_error": zero_one
    }
    return performance
Beispiel #4
0
    def cross_validation(self, features):
        '''
        standalone validation of an untrained classifier
        splits the features into a training test set and a set for validation
        Warning: overwrites existing trained model 
        '''
        values, classes, categories = self._features_to_values(features)
        values = np.nan_to_num(values)
        n_classes = len(categories)

        (training_values, test_values, training_classes,
         test_classes) = train_test_split(values,
                                          classes,
                                          test_size=self.validation_split,
                                          random_state=self.seed)

        self._train(np.array(training_values), training_classes, n_classes)
        predictions = self._predict(np.array(test_values))
        predicted_classes = np_utils.probas_to_classes(predictions)
        binary_labels = np_utils.to_categorical(test_classes)

        # compute the metrics
        accuracy = accuracy_score(test_classes, predicted_classes)
        precision_score = average_precision_score(binary_labels, predictions)
        error = coverage_error(binary_labels, predictions)
        loss = label_ranking_loss(binary_labels, predictions)
        label_precision = label_ranking_average_precision_score(
            binary_labels, predictions)

        real_cat = categories[test_classes]
        predicted_cat = categories[predicted_classes]
        return (real_cat, predicted_cat, accuracy, precision_score, error,
                loss, label_precision)
Beispiel #5
0
def metric(pred_prob, label, inclusion_index_set, threshold=0.5):
    # label, pred_prob structure: [n_classes, n_samples]
    included_pred_prob = list()
    included_label = list()
    for index in inclusion_index_set:
        included_pred_prob.append(pred_prob[index])
        included_label.append(label[index])
    prob = np.array(included_pred_prob).transpose()
    pred = np.array(included_pred_prob).transpose() > threshold
    true = np.array(included_label).transpose()

    micro_auc = roc_auc_score(true, prob, average='micro')
    macro_auc = roc_auc_score(true, prob, average='macro')
    micro_f1 = f1_score(true, pred, average='micro')
    macro_f1 = f1_score(true, pred, average='macro')
    micro_avg_precision = average_precision_score(true, prob, average='micro')
    macro_avg_precision = average_precision_score(true, prob, average='macro')
    coverage = coverage_error(true, prob)
    ranking_loss = label_ranking_loss(true, prob)
    hamming = hamming_loss(true, pred)
    fuse = np.concatenate([prob[:, :, np.newaxis], true[:, :, np.newaxis]], axis=2).transpose([1, 0, 2])
    top_1_num = top_k_num(fuse, 1)
    top_3_num = top_k_num(fuse, 3)
    top_5_num = top_k_num(fuse, 5)
    top_10_num = top_k_num(fuse, 10)
    top_20_num = top_k_num(fuse, 20)
    top_30_num = top_k_num(fuse, 30)
    top_40_num = top_k_num(fuse, 40)
    top_50_num = top_k_num(fuse, 50)

    return macro_auc, micro_auc, micro_f1, macro_f1, micro_avg_precision, macro_avg_precision, coverage, ranking_loss, \
        hamming, top_1_num, top_3_num, top_5_num, top_10_num, top_20_num, top_30_num, top_40_num, top_50_num
def powerset(X_train, X_test, y_train, y_test, classifier):

    print("Label Powerset")
    model = chooseClassifier(classifier, X_train, y_train)
    y_pred = model.predict(X_test)

    hamming = hamming_loss(y_test, y_pred)
    subset_accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='micro')
    precision = precision_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    coverage = coverage_error(y_test, y_pred.toarray())
    aps = label_ranking_average_precision_score(y_test, y_pred.toarray())
    rankingloss = label_ranking_loss(y_test, y_pred.toarray())
    print("Hamming: " + str(hamming))
    print("Subset Accuracy: " + str(subset_accuracy))
    print("Recall: " + str(recall))
    print("Precision: " + str(precision))
    print("F1: " + str(f1))
    print("Coverage error: " + str(coverage))
    print("Average Precision Score: " + str(aps))
    print("Ranking Loss: " + str(rankingloss))
    print("\n")

    return hamming, subset_accuracy, recall, precision, f1, coverage, aps, rankingloss
Beispiel #7
0
def no_motion_baseline_metrics(original_dataset_cartesian):
    traces_train, traces_test = get_traces_for_train_and_test()
    accuracy_results = []
    f1_score_results = []
    ranking_results = []
    for trace_num, trace in enumerate(traces_test):
        user = trace['user']
        video = trace['video']
        repl_tiles_map = read_replica_tile_info(video, user)
        for t in range(M_WINDOW,
                       len(original_dataset_cartesian[user][video]) -
                       H_WINDOW):
            print('computing no_motion metrics for trace', trace_num, '/',
                  len(traces_test), 'time-stamp:', t)
            past_positions = original_dataset_cartesian[user][video][
                t - M_WINDOW:t + 1]
            # pred_tile_map = from_position_to_tile_probability_cartesian(past_positions[-1])
            pred_tile_map = repl_tiles_map[t]
            future_tile_maps = repl_tiles_map[t + 1:t + H_WINDOW + 1]
            for x_i, tile_map in enumerate(future_tile_maps):
                accuracy_results.append(
                    accuracy_score(np.ndarray.flatten(tile_map),
                                   np.ndarray.flatten(pred_tile_map)))
                f1_score_results.append(
                    f1_score(np.ndarray.flatten(tile_map),
                             np.ndarray.flatten(pred_tile_map)))
                ranking_results.append(
                    label_ranking_loss(tile_map, pred_tile_map))
    return np.mean(accuracy_results) * 100, np.mean(f1_score_results), np.mean(
        ranking_results)
Beispiel #8
0
    def update_from_numpy(self, preds, labels):
        for pred, label, cls in zip(zip(*preds), zip(*labels), self.confusion):
            true_pos = np.sum([p and l for p, l in zip(pred, label)])
            true_neg = np.sum([not p and not l for p, l in zip(pred, label)])
            false_pos = np.sum([p and not l for p, l in zip(pred, label)])
            false_neg = np.sum([not p and l for p, l in zip(pred, label)])

            self.num_true_positives += true_pos
            self.num_true_positives += true_neg
            self.num_false_positives += false_pos
            self.num_false_negatives += false_neg

            cls["true_pos"] += true_pos
            cls["true_neg"] += true_neg
            cls["false_pos"] += false_pos
            cls["false_neg"] += false_neg
            cls["support"] += true_pos + false_neg

        n = len(preds)

        self.n += n
        self.ranking_loss += label_ranking_loss(labels, preds) * n
        self.coverage += coverage_error(labels, preds) * n
        self.average_precision += label_ranking_average_precision_score(
            labels, preds) * n

        for pred, label in zip(preds, labels):
            lowest_rank_prediction = np.argsort(pred)[-1]
            label = np.argwhere(label)

            if lowest_rank_prediction not in label:
                self.one_error += 1
    def on_epoch_end(self, epoch, logs={}):
        result = self.model.predict(self.x_test)
        roc_auc = metrics.roc_auc_score(self.y_test.ravel(), result.ravel())
        print('\r Micro val_roc_auc: %s' % (str(round(roc_auc, 4))),
              end=100 * ' ' + '\n')

        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(14):
            fpr[i], tpr[i], _ = roc_curve(self.y_test[:, i], result[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            print("Class " + str(i) + "auc = " + str(roc_auc[i]))

        macro = sum(roc_auc.values()) / 14
        print('\r Macro val_roc_auc: %s' % (str(round(macro, 4))),
              end=100 * ' ' + '\n')
        value = coverage_error(self.y_test, result)
        print('\r coverage_error: %s' % (str(round(value, 4))),
              end=100 * ' ' + '\n')

        value = label_ranking_loss(self.y_test, result)
        print('\r label_ranking_loss: %s' % (str(round(value, 4))),
              end=100 * ' ' + '\n')

        roc_auc = label_ranking_average_precision_score(self.y_test, result)
        print('\r label_ranking_average_precision_score: %s' %
              (str(round(roc_auc, 4))),
              end=100 * ' ' + '\n')

        return
Beispiel #10
0
def binary(X_train, X_test, y_train, y_test):

    print("Binary Relevance")
    model = BinaryRelevance(classifier=SVC(),
                            require_dense=[True, True]).fit(X_train, y_train)
    y_pred = model.predict(X_test)

    hamming = hamming_loss(y_test, y_pred)
    subset_accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='micro')
    precision = precision_score(y_test, y_pred, average='micro')
    f1 = f1_score(y_test, y_pred, average='micro')
    coverage = coverage_error(y_test, y_pred.toarray())
    aps = label_ranking_average_precision_score(y_test, y_pred.toarray())
    rankingloss = label_ranking_loss(y_test, y_pred.toarray())
    print("Hamming: " + str(hamming))
    print("Subset Accuracy: " + str(subset_accuracy))
    print("Recall: " + str(recall))
    print("Precision: " + str(precision))
    print("F1: " + str(f1))
    print("Coverage error: " + str(coverage))
    print("Average Precision Score: " + str(aps))
    print("Ranking Loss: " + str(rankingloss))
    print("\n")

    return hamming, subset_accuracy, recall, precision, f1, coverage, aps, rankingloss
Beispiel #11
0
    def on_epoch_end(self, epoch, logs={}):
        result = self.model.predict_generator(self.val_gen,
                                            steps=self.val_gen.n / BATCH,
                                            verbose=1)

        print(self.y[0])
        print(result[0])
        roc_auc = metrics.roc_auc_score(self.y.ravel(), result.ravel())
        print('\r Micro val_roc_auc: %s' % (str(round(roc_auc,4))), end=100*' '+'\n')

        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(14):
            fpr[i], tpr[i], _ = roc_curve(self.y[:, i], result[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])
            print("Class " + str(i) + "auc = " + str(roc_auc[i]))


        value = coverage_error(self.y, result)
        print('\r coverage_error: %s' % (str(round(value,4))), end=100*' '+'\n')

        value = label_ranking_loss(self.y, result)
        print('\r label_ranking_loss: %s' % (str(round(value, 4))), end=100 * ' ' + '\n')

        roc_auc = label_ranking_average_precision_score(self.y, result)
        print('\r label_ranking_average_precision_score: %s' % (str(round(roc_auc,4))), end=100*' '+'\n')

        return
def evaluate(predictions, labels, threshold=0.5):
    '''
        True Positive  :  Label : 1, Prediction : 1
        False Positive :  Label : 0, Prediction : 1
        False Negative :  Label : 0, Prediction : 0
        True Negative  :  Label : 1, Prediction : 0
        Precision      :  TP/(TP + FP)
        Recall         :  TP/(TP + FN)
        F Score        :  2.P.R/(P + R)
        Ranking Loss   :  The average number of label pairs that are incorrectly ordered given predictions
        Hammming Loss  :  The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels)
    '''
    assert predictions.shape == labels.shape, "Shapes: %s, %s" % (
        predictions.shape,
        labels.shape,
    )
    metrics = dict()
    # print('pre', predictions)
    # print('label', labels)
    metrics['coverage'] = coverage_error(labels, predictions)
    metrics['average_precision'] = label_ranking_average_precision_score(
        labels, predictions)
    metrics['ranking_loss'] = label_ranking_loss(labels, predictions)

    for i in range(predictions.shape[0]):
        predictions[i, :][predictions[i, :] >= threshold] = 1
        predictions[i, :][predictions[i, :] < threshold] = 0

    metrics['bae'] = 0
    metrics['patk'] = patk(predictions, labels)
    metrics['hamming_loss'] = hamming_loss(y_pred=predictions, y_true=labels)
    metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \
        metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions)

    return metrics
 def evaluateFold(self, clf, mask):
     probs = clf.predict_proba(
         self.x[~mask])[:,
                        1]  #get probability of test ligands being positive
     ranking_loss = label_ranking_loss(
         self.y[~mask][:, self.targetIndex].reshape(1, -1),
         probs.reshape(1, -1))
     return ranking_loss
def test_ranking_loss_ties_handling():
    # Tie handling
    assert_almost_equal(label_ranking_loss([[1, 0]], [[0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[0, 1]], [[0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.5]]), 1 / 2)
    assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.5]]), 1 / 2)
    assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 0)
    assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.5]]), 1)
Beispiel #15
0
def evaluate_ouput(y_test, output):
    metrics = dict()
    metrics['coverage'] = coverage_error(y_test, output)
    metrics['average_precision'] = label_ranking_average_precision_score(
        y_test, output)
    metrics['ranking_loss'] = label_ranking_loss(y_test, output)
    metrics['one_error'] = OneError(output, y_test)

    return metrics
Beispiel #16
0
 def _costFunction(self,y_ni,t_ni):
     res=0.0
     if self._loss == 'hamming':
         res = hamming_loss(y_ni, t_ni)
     elif self._loss == 'rank':
         res=label_ranking_loss(y_ni, t_ni)
     elif self._loss == 'f1':
         res=1-f1_score(y_ni, t_ni, average='binary')
     return res
Beispiel #17
0
def compute_evaluation(true_matrix, predict_matrix):
    h = hamming_loss(true_matrix, predict_matrix)
    z = zero_one_loss(true_matrix, predict_matrix)
    c = coverage_error(true_matrix, predict_matrix) - 1
    r = label_ranking_loss(true_matrix, predict_matrix)
    a = average_precision_score(true_matrix, predict_matrix)

    result = [h, z, c, r, a]
    return result
    def evaluate(self, evaluation_metric='auc'):
        """
        Prints evaluation score

        :param evaluation_metric: string name of the evaluation metric specified in EVALUATION_METRIC_VALUES
        """

        if evaluation_metric not in EVALUATION_METRIC_VALUES:
            print('Error: wrong evaluation metric')
            return

        predictions = self.predictions
        mask = self.mask
        true_values = self.true_values

        ratings_true = []
        ratings_predicted = []

        for i in range(predictions.shape[0]):
            for j in range(predictions.shape[1]):
                if mask[i][j]:
                    ratings_true.append(true_values[i][j])
                    ratings_predicted.append(predictions[i][j])

        ratings_true = np.asarray(ratings_true)
        ratings_predicted = np.asarray(ratings_predicted)

        # Rmse
        if evaluation_metric == 'rmse':
            score = rmse(ratings_true, ratings_predicted)
            print('\nrmse: ' + str(score))

        # Auc
        if evaluation_metric == 'auc':
            score = roc_auc_score(ratings_true, ratings_predicted)
            print('\nauc: ' + str(score))

        # Label ranking loss
        if evaluation_metric == 'lrl':
            max_rating = max(ratings_predicted)
            min_rating = min(ratings_predicted)

            normalized_ratings = []
            for r in ratings_predicted:
                new_rating = (r - min_rating) / (max_rating - min_rating)
                normalized_ratings.append(new_rating)

            ratings_predicted = np.zeros((len(ratings_predicted), 3))
            for index, r in enumerate(normalized_ratings):
                ratings_predicted[index, 0] = 1 - r
                ratings_predicted[index, 1] = 1 - ratings_predicted[index, 0]
                ratings_predicted[index, 2] = 0

            #ratings_predicted = label_binarize(ratings_predicted, classes=[1, 2])
            ratings_true = label_binarize(ratings_true, classes=[1, 2, 3])
            score = label_ranking_loss(ratings_true, ratings_predicted)
            print('\nlabel ranking loss: ' + str(score))
Beispiel #19
0
def test_ranking_loss_ties_handling():
    # Tie handling
    assert_almost_equal(label_ranking_loss([[1, 0]], [[0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[0, 1]], [[0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.5]]),
                        1 / 2)
    assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.5]]),
                        1 / 2)
    assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 0)
    assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.5]]), 1)
    assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.5]]), 1)
Beispiel #20
0
 def ranking_loss(self):
     """
     Computes the ranking loss, which averages the number of
     incorrectly-ordered labels (i.e. true labels have a lower 
     score than false labels, weighted by the the inverse number 
     of false and true labels) based on raw precision scores.
     """
     self.ranking_loss = metrics.label_ranking_loss(
         self.ground_truth, self.predictions_raw)
     return self.ranking_loss
Beispiel #21
0
def evalulate(y_true, y_prob):
    # the following deal with {0,1} and {-1,1} ambiguities
    # but may slow down the process (?? not sure)
    # if -1 in y_true:
    # y_true = (y_true + 1) / 2.0
    y_true = (y_true + 1) / 2.0
    auc = roc_auc_score(y_true, y_prob)
    ap = label_ranking_average_precision_score([y_true], [y_prob])
    rl = label_ranking_loss([y_true], [y_prob])
    return auc, ap, rl
Beispiel #22
0
 def ranking_loss(self):
     """
     Computes the ranking loss, which averages the number of
     incorrectly-ordered labels (i.e. true labels have a lower 
     score than false labels, weighted by the the inverse number 
     of false and true labels) based on raw precision scores.
     """
     self.ranking_loss = metrics.label_ranking_loss(self.ground_truth,
                                                    self.predictions_raw)
     return self.ranking_loss
Beispiel #23
0
def get_score(y_true, y_pred, labels=None):
    scores = {}
    scores["lrap"] = label_ranking_average_precision_score(y_true, y_pred)
    scores["lrloss"] = label_ranking_loss(y_true, y_pred)
    scores["ndcg_score"] = ndcg_score(y_true, y_pred)
    scores["coverage_error"] = coverage_error(y_true, y_pred)
    try:
        scores["hamming_loss"] = hamming_loss(y_true, y_pred)
    except:
        scores["hamming_loss"] = None
    try:
        scores["subset_accuracy"] = accuracy_score(y_true, y_pred)
    except:
        scores["subset_accuracy"] = None

    for avg in [None, "micro", "macro", "weighted", "samples"]:
        if avg:
            avg_suffix = f"_{avg}"
            try:
                (
                    scores[f"precision{avg_suffix}"],
                    scores[f"recall{avg_suffix}"],
                    scores[f"f1{avg_suffix}"],
                    _,
                ) = precision_recall_fscore_support(y_true, y_pred, average=avg)
            except:
                (
                    scores[f"precision{avg_suffix}"],
                    scores[f"recall{avg_suffix}"],
                    scores[f"f1{avg_suffix}"],
                ) = (None, None, None)
            try:
                scores[f"roc_auc{avg_suffix}"] = roc_auc_score(
                    y_true, y_pred, average=avg
                )
            except:
                scores[f"roc_auc{avg_suffix}"] = None
        else:
            try:
                p, r, f, _ = precision_recall_fscore_support(y_true, y_pred)
                scores[f"precision"], scores[f"recall"], scores[f"f1"] = (
                    dict(zip(labels, list(sc))) for sc in (p, r, f)
                )
            except:
                scores[f"precision"], scores[f"recall"], scores[f"f1"] = (
                    None,
                    None,
                    None,
                )
            try:
                scores["roc_auc"] = roc_auc_score(y_true, y_pred)
            except:
                scores["roc_auc"] = None

    return scores
Beispiel #24
0
def print_predict(ground_truth, prediction, hyper_params):
    rounded = 4
    AUC_macro = round(roc_auc_score(ground_truth, prediction, average='macro'),
                      rounded)
    AUC_micro = round(roc_auc_score(ground_truth, prediction, average='micro'),
                      rounded)
    Coverage_error = round(
        (coverage_error(ground_truth, prediction)) / ground_truth.shape[1],
        rounded)
    rankloss = round(label_ranking_loss(ground_truth, prediction), rounded)
    One_error = round(one_error(ground_truth, prediction), rounded)
    Precision_at_ks = precision_at_ks(ground_truth, prediction)
    Log_loss = round(log_loss(ground_truth, prediction), rounded)
    Average_precision_score = round(
        average_precision_score(ground_truth, prediction), rounded)

    prediction = np.round(prediction)

    F1_Micro = round(f1_score(ground_truth, prediction, average='micro'),
                     rounded)
    Hamming_loss = round(hamming_loss(ground_truth, prediction), rounded)
    Accuracy = round(accuracy_score(ground_truth, prediction), rounded)
    Recall_score_macro = round(
        recall_score(ground_truth, prediction, average='macro'), rounded)
    Recall_score_micro = round(
        recall_score(ground_truth, prediction, average='micro'), rounded)
    Precision_score_macro = round(
        precision_score(ground_truth, prediction, average='macro'), rounded)
    Precision_score_micro = round(
        precision_score(ground_truth, prediction, average='micro'), rounded)
    Jaccard_score_macro = round(
        jaccard_score(ground_truth, prediction, average='macro'), rounded)
    Jaccard_score_micro = round(
        jaccard_score(ground_truth, prediction, average='micro'), rounded)

    print('Recall_score_macro:   ', Recall_score_macro)
    print('Recall_score_micro:   ', Recall_score_micro)
    print('Precision_score_macro:   ', Precision_score_macro)
    print('Precision_score_micro:   ', Precision_score_micro)
    print('Jaccard_score_macro:   ', Jaccard_score_macro)
    print('Jaccard_score_micro:   ', Jaccard_score_micro)
    print("Accuracy = ", Accuracy)
    print('precision_at_ks: ', Precision_at_ks)
    print('Hamming_loss: ', Hamming_loss)
    print('Log_loss:  ', Log_loss)
    print('Average_precision_score: ', Average_precision_score)
    print('F1_Micro ', F1_Micro)
    print('One_error: ', One_error)
    print('Ranking loss: ', rankloss)
    print('coverage: ', Coverage_error)
    print('AUC-micro:   ', AUC_micro)
    print('AUC-macro:   ', AUC_macro)

    print('\n')
Beispiel #25
0
def evaluate(_y_true, _y_pred, _y_scores):
    y_true = np.array(_y_true)
    y_pred = np.array(_y_pred)
    y_scores = np.array(_y_scores)

    pre = precision_score(y_true, y_pred, average='micro')
    rec = recall_score(y_true, y_pred, average='micro')
    fs = f1_score(y_true, y_pred, average='micro')
    hl = hamming_loss(y_true, y_pred)
    rl = label_ranking_loss(y_true, y_scores)
    return pre, rec, fs, hl, rl
Beispiel #26
0
 def get_classification_report_2(self,train_y, predicted_score, verbose = 1):
     cov_err = metrics.coverage_error(train_y,predicted_score)
     label_rank_avg_prec = metrics.label_ranking_average_precision_score(train_y, predicted_score)
     rank_loss = metrics.label_ranking_loss(train_y, predicted_score)
     log_loss = metrics.log_loss(train_y, predicted_score)
     if(verbose):
         print('CoverageError', cov_err)
         print('LabelRankingAvgPrec', label_rank_avg_prec)
         print('LabelRankingLoss', rank_loss)
         print('log_loss', log_loss)
     return [cov_err, label_rank_avg_prec, rank_loss, log_loss]
Beispiel #27
0
def treino_binarizacao(X, Y):
    labels = [
        'Latitude', 'Longitude', 'DiaSemChuva', 'Precipitacao', 'RiscoFogo',
        'TempBulboSecoEst1', 'TempBulboUmidoEst1', 'UmidadeRelativaEst1',
        'DirecaoVentoEst1', 'VelocidadeVentoNebulosidadeEst1',
        'DistanciaParaEst1', 'TempBulboSecoEst2', 'TempBulboUmidoEst2',
        'UmidadeRelativaEst2', 'DirecaoVentoEst2',
        'VelocidadeVentoNebulosidadeEst2', 'DistanciaParaEst2'
    ]
    mlb = MultiLabelBinarizer()
    Ybin = mlb.fit_transform(Y)
    mlp = neuralnetwork.MLPClassifier(hidden_layer_sizes=(10, 4),
                                      activation='tanh',
                                      solver='lbfgs',
                                      learning_rate='invscaling',
                                      random_state=2818,
                                      max_iter=400,
                                      early_stopping=True)
    x_train, x_test, y_train, y_test = model.train_test_split(X,
                                                              Y,
                                                              train_size=0.33)
    mlp.fit(x_train, y_train)
    y_pred = mlp.predict(x_test)
    print("Erro de cobertura:" + str(metrics.coverage_error(y_test, y_pred)))
    print("Precisão média de labels:" +
          str(metrics.label_ranking_average_precision_score(y_test, y_pred)))
    print("Perda de ranks:" + str(metrics.label_ranking_loss(y_test, y_pred)))
    matriz = matriz_confusao(y_test, y_pred)
    results = {
        "Erro de cobertura":
        metrics.coverage_error(y_test, y_pred),
        "Precisão média de labels":
        metrics.label_ranking_average_precision_score(y_test, y_pred),
        "Perda de ranks":
        metrics.label_ranking_loss(y_test, y_pred),
        "Matrizes":
        matriz
    }
    res_df = pd.DataFrame(results)
    res_df.to_csv(
        "C:\\Users\Livnick\Documents\dadosFocos\ResultadosMAcomMatriz2.csv")
Beispiel #28
0
def CVPR18_metrics(original_dataset_cartesian):
    M_WINDOW_TRAINED_MODEL = 5
    H_WINDOW_TRAINED_MODEL = 25
    traces_train, traces_test = get_traces_for_train_and_test()
    model = create_CVPR18_model(M_WINDOW_TRAINED_MODEL, H_WINDOW_TRAINED_MODEL,
                                NUM_TILES_HEIGHT_SAL, NUM_TILES_WIDTH_SAL)
    model.load_weights(
        os.path.join(
            ROOT_FOLDER, 'CVPR18',
            'Models_EncDec_3DCoords_ContSal_init_5_in_5_out_25_end_25',
            'weights_100.hdf5'))
    accuracy_results = []
    f1_score_results = []
    ranking_results = []
    for trace_num, trace in enumerate(traces_test):
        print('computing CVPR18 metrics for trace', trace_num, '/',
              len(traces_test))
        user = trace['user']
        video = trace['video']
        repl_tiles_map = read_replica_tile_info(video, user)
        saliency_in_video = load_saliency(SALIENCY_FOLDER, video)
        for t in range(M_WINDOW,
                       len(original_dataset_cartesian[user][video]) -
                       H_WINDOW):
            past_positions = original_dataset_cartesian[user][video][
                t - M_WINDOW:t + 1]
            # ToDo: The value "6" is hardcoder, it comes from "int(MODEL_SAMPLING_RATE / ORIGINAL_SAMPLING_RATE)"
            curr_id_in_model_steps = int(t / 6)
            sal_decoder = np.zeros(
                (1, H_WINDOW_TRAINED_MODEL, NUM_TILES_HEIGHT_SAL,
                 NUM_TILES_WIDTH_SAL, 1))
            picked_sal_decoder = saliency_in_video[curr_id_in_model_steps +
                                                   1:curr_id_in_model_steps +
                                                   H_WINDOW_TRAINED_MODEL + 1]
            sal_decoder[0, :len(picked_sal_decoder), :, :,
                        0] = picked_sal_decoder
            pred_tile_map = get_CVPR18_prediction(model, past_positions,
                                                  M_WINDOW_TRAINED_MODEL,
                                                  sal_decoder)
            # future_positions = original_dataset_cartesian[user][video][t+1:t+H_WINDOW+1]
            future_tile_maps = repl_tiles_map[t + 1:t + H_WINDOW + 1]
            for x_i, tile_map in enumerate(future_tile_maps):
                accuracy_results.append(
                    accuracy_score(np.ndarray.flatten(tile_map),
                                   np.ndarray.flatten(pred_tile_map[x_i])))
                f1_score_results.append(
                    f1_score(np.ndarray.flatten(tile_map),
                             np.ndarray.flatten(pred_tile_map[x_i])))
                ranking_results.append(
                    label_ranking_loss(tile_map, pred_tile_map[x_i]))
    print('CVPR18:\tAccuracy',
          np.mean(accuracy_results) * 100, 'F-Score',
          np.mean(f1_score_results), 'Rank. Loss', np.mean(ranking_results))
Beispiel #29
0
def Ranking_loss(labels, probs, mode=1):
    '''
    用来考察样本的不相关标记的排序低于相关标记的排序情况
    @labels: true labels of samples
    @probs:  label's probility  of samples
    '''
    if mode:
        rl = label_ranking_loss(labels, probs)
    else:
        rl = np.mean(list(map(_ranking_loss, probs, labels)))

    return rl
Beispiel #30
0
def writeall(y_true,y_score,filename): 
    fp = open(str(filename) + ".txt","w")
    fp.write("Classification Report:\n")   
    fp.write(mlc_classification_report(y_true,y_score))
    fp.write("\nHamming loss (lower is better [0,1]): " + str(hamming_loss(y_true,y_score)))
    fp.write("\nAccuracy score (higher is better [0,1]): " + str(mlc_accuracy_score(y_true,y_score)))
    fp.write("\nJaccard similarity score (higher is better [0,1]): " + str(mlc_jaccard_similarity_score(y_true,y_score)))
    fp.write("\nF1 score: " + str(mlc_f1score(y_true,y_score)))
    fp.write("\nSubset accuracy (higher is better [0,1]): " + str(mlc_subset_accuracy(y_true,y_score)))
    fp.write("\nAverage precision score (higher is better [0,1]): " + str(average_precision_score(y_true,y_score)))
    fp.write("\nRanking Loss: (lower is better [0,1]) " + str(label_ranking_loss(y_true,y_score)))
    fp.write("\nAverage Micro Precision: " +str(precision_score(y_true,y_score,average='micro')))
    fp.write("\nAverage Micro Recall: "+str(recall_score(y_true,y_score,average='micro')))
    fp.close()
Beispiel #31
0
def get_avg_results(hat_y, y):
    values = {}
    values['avg_precision_micro'] = average_precision_score(y,
                                                            hat_y,
                                                            average='micro')
    #    values['avg_precision_macro'] = average_precision_score(y, hat_y, average = 'macro')
    values['roc_auc_score_micro'] = roc_auc_score(y, hat_y, average='micro')
    #    values['roc_auc_score_macro'] = roc_auc_score(y, hat_y, average = 'macro')
    values['coverage_error'] = coverage_error(y, hat_y)
    values[
        'label_ranking_average_precision_score'] = label_ranking_average_precision_score(
            y, hat_y)
    values['label_ranking_loss'] = label_ranking_loss(y, hat_y)
    return values
Beispiel #32
0
            def eval_fold(train_idxs, test_idxs):
                measures = [0, 0, 0, 0, 0, 0]
                fold_classifier = clone(classifier)
                X_train, X_test = split_training_test(X, train_idxs, test_idxs)
                Y_train, Y_test = Y[train_idxs], Y[test_idxs]

                fold_classifier.fit(X_train, Y_train)
                Y_pred = fold_classifier.predict(X_test)
                measures[0] = f1_score(Y_test, Y_pred, average='macro')
                measures[1] = f1_score(Y_test, Y_pred, average='micro')
                measures[2] = accuracy_score(Y_test, Y_pred)
                measures[3] = label_ranking_loss(Y_test, Y_pred)
                measures[4] = hamming_loss(Y_test, Y_pred)
                measures[5] = zero_one_loss(Y_test, Y_pred)
                return np.array(measures)
Beispiel #33
0
def multi_label_evaluate(y, y_prob, threshold):
    statistics = Statistics()
    y_pred = (y_prob >= threshold).astype(int)
    y_pred_50 = (y_prob >= 0.5).astype(int)

    ranking_loss = label_ranking_loss(y, y_pred)
    lraps = label_ranking_average_precision_score(y, y_pred)
    ranking_loss_50 = label_ranking_loss(y, y_pred_50)
    lraps_50 = label_ranking_average_precision_score(y, y_pred_50)

    f1_macro = f1_score(y, y_pred, average='macro')
    f1_macro_50 = f1_score(y, y_pred_50, average='macro')

    statistics.update_statistics("Multi-Label", "Ranking Loss", ranking_loss)
    statistics.update_statistics("Multi-Label", "Ranking Precision", lraps)
    statistics.update_statistics("Multi-Label", "Ranking Loss (t=0.5)", ranking_loss_50)
    statistics.update_statistics("Multi-Label", "Ranking Precision (t=0.5)", lraps_50)

    statistics.update_statistics("Multi-Label", "Macro F1", f1_macro)
    statistics.update_statistics("Multi-Label", "Macro F1 (t=0.5)", f1_macro_50)

    try:
        auc_macro = roc_auc_score(y, y_pred, average='macro')
        auc_macro_50 = roc_auc_score(y, y_pred_50, average='macro')
        auc_pr_macro = roc_auc_score(y, y_prob, average='macro')

        statistics.update_statistics("Multi-Label", "Macro AUC", auc_macro)
        statistics.update_statistics("Multi-Label", "Macro AUC (t=0.5)", auc_macro_50)
        statistics.update_statistics("Multi-Label", "Macro AUC (Pr)", auc_pr_macro)

    except ValueError:
        statistics.update_statistics("Multi-Label", "Macro AUC", np.NaN)
        statistics.update_statistics("Multi-Label", "Macro AUC (t=0.5)", np.NaN)
        statistics.update_statistics("Multi-Label", "Macro AUC (Pr)", np.NaN)

    return statistics
def evaluate(predictions, labels, threshold=0.4, multi_label=True):
    '''
        True Positive  :  Label : 1, Prediction : 1
        False Positive :  Label : 0, Prediction : 1
        False Negative :  Label : 0, Prediction : 0
        True Negative  :  Label : 1, Prediction : 0
        Precision      :  TP/(TP + FP)
        Recall         :  TP/(TP + FN)
        F Score        :  2.P.R/(P + R)
        Ranking Loss   :  The average number of label pairs that are incorrectly ordered given predictions
        Hammming Loss  :  The fraction of labels that are incorrectly predicted. (Hamming Distance between predictions and labels)
    '''
    assert predictions.shape == labels.shape, "Shapes: %s, %s" % (predictions.shape, labels.shape,)
    metrics = dict()
    if not multi_label:
        metrics['bae'] = BAE(labels, predictions)
        labels, predictions = np.argmax(labels, axis=1), np.argmax(predictions, axis=1)

        metrics['accuracy'] = accuracy_score(labels, predictions)
        metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], _ = \
            precision_recall_fscore_support(labels, predictions, average='micro')
        metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['coverage'], \
            metrics['average_precision'], metrics['ranking_loss'], metrics['pak'], metrics['hamming_loss'] \
            = 0, 0, 0, 0, 0, 0, 0, 0

    else:
        metrics['coverage'] = coverage_error(labels, predictions)
        metrics['average_precision'] = label_ranking_average_precision_score(labels, predictions)
        metrics['ranking_loss'] = label_ranking_loss(labels, predictions)
        
        for i in range(predictions.shape[0]):
            predictions[i, :][predictions[i, :] >= threshold] = 1
            predictions[i, :][predictions[i, :] < threshold] = 0

        metrics['bae'] = 0
        metrics['patk'] = patk(predictions, labels)
        metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1'], metrics['macro_precision'], \
            metrics['macro_recall'], metrics['macro_f1'] = bipartition_scores(labels, predictions)
    return metrics
Beispiel #35
0
 def ranking_loss(self):
     self.ranking_loss = metrics.label_ranking_loss(self.ground_truth, self.predictions_raw)
     return 'Ranking Loss: ' + str(self.ranking_loss)
def multilabel_metrics(pred_list, verbose, extra_vars, split):
    """
    Multiclass classification metrics. see multilabel ranking metrics in sklearn library for more info:
        http://scikit-learn.org/stable/modules/model_evaluation.html#multilabel-ranking-metrics

    :param pred_list: dictionary of hypothesis sentences
    :param verbose: if greater than 0 the metric measures are printed out
    :param extra_vars: extra variables
                        extra_vars['word2idx'] - dictionary mapping from words to indices
                        extra_vars['references'] - list of GT labels
    :param split: split on which we are evaluating
    :return: Dictionary of multilabel metrics
    """
    from sklearn import metrics as sklearn_metrics

    word2idx = extra_vars[split]['word2idx']

    # check if an additional dictionary matching raw to basic and general labels is provided
    # in that case a more general evaluation will be considered
    raw2basic = extra_vars[split].get('raw2basic', None)
    if raw2basic is not None:
        logging.info('Applying general evaluation with raw2basic dictionary.')

    if raw2basic is None:
        n_classes = len(word2idx)
    else:
        basic_values = set(raw2basic.values())
        n_classes = len(basic_values)
    n_samples = len(pred_list)

    # Create prediction matrix
    y_pred = np.zeros((n_samples, n_classes))
    for i_s, sample in list(enumerate(pred_list)):
        for word in sample:
            if raw2basic is None:
                y_pred[i_s, word2idx[word]] = 1
            else:
                word = word.strip()
                y_pred[i_s, raw2basic[word]] = 1

    # Prepare GT
    gt_list = extra_vars[split]['references']

    if raw2basic is None:
        y_gt = np.array(gt_list)
    else:
        idx2word = {v: k for k, v in iteritems(word2idx)}
        y_gt = np.zeros((n_samples, n_classes))
        for i_s, sample in list(enumerate(gt_list)):
            for raw_idx, is_active in list(enumerate(sample)):
                if is_active:
                    word = idx2word[raw_idx].strip()
                    y_gt[i_s, raw2basic[word]] = 1

    # Compute Coverage Error
    coverr = sklearn_metrics.coverage_error(y_gt, y_pred)
    # Compute Label Ranking AvgPrec
    avgprec = sklearn_metrics.label_ranking_average_precision_score(y_gt, y_pred)
    # Compute Label Ranking Loss
    rankloss = sklearn_metrics.label_ranking_loss(y_gt, y_pred)
    # Compute Precision, Recall and F1 score
    precision, recall, f1, _ = sklearn_metrics.precision_recall_fscore_support(y_gt, y_pred, average='micro')

    if verbose > 0:
        logging.info(
            '"coverage_error" (best: avg labels per sample = %f): %f' % (float(np.sum(y_gt)) / float(n_samples), coverr))
        logging.info('Label Ranking "average_precision" (best: 1.0): %f' % avgprec)
        logging.info('Label "ranking_loss" (best: 0.0): %f' % rankloss)
        logging.info('precision: %f' % precision)
        logging.info('recall: %f' % recall)
        logging.info('f1: %f' % f1)

    return {'coverage_error': coverr,
            'average_precision': avgprec,
            'ranking_loss': rankloss,
            'precision': precision,
            'recall': recall,
            'f1': f1}
Beispiel #37
0
def test_label_ranking_loss():
    assert_almost_equal(label_ranking_loss([[0, 1]], [[0.25, 0.75]]), 0)
    assert_almost_equal(label_ranking_loss([[0, 1]], [[0.75, 0.25]]), 1)

    assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.75]]),
                        0)
    assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.75]]),
                        1 / 2)
    assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.75]]),
                        0)
    assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.75]]),
                        2 / 2)
    assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.75]]),
                        1 / 2)
    assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.75]]),
                        2 / 2)

    # Undefined metrics -  the ranking doesn't matter
    assert_almost_equal(label_ranking_loss([[0, 0]], [[0.75, 0.25]]), 0)
    assert_almost_equal(label_ranking_loss([[1, 1]], [[0.75, 0.25]]), 0)
    assert_almost_equal(label_ranking_loss([[0, 0]], [[0.5, 0.5]]), 0)
    assert_almost_equal(label_ranking_loss([[1, 1]], [[0.5, 0.5]]), 0)

    assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.5, 0.75, 0.25]]),
                        0)
    assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.5, 0.75, 0.25]]),
                        0)
    assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.25, 0.5, 0.5]]),
                        0)
    assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 0)

    # Non trival case
    assert_almost_equal(label_ranking_loss([[0, 1, 0], [1, 1, 0]],
                                           [[0.1, 10., -3], [0, 1, 3]]),
                        (0 + 2 / 2) / 2.)

    assert_almost_equal(label_ranking_loss(
        [[0, 1, 0], [1, 1, 0], [0, 1, 1]],
        [[0.1, 10, -3], [0, 1, 3], [0, 2, 0]]),
        (0 + 2 / 2 + 1 / 2) / 3.)

    assert_almost_equal(label_ranking_loss(
        [[0, 1, 0], [1, 1, 0], [0, 1, 1]],
        [[0.1, 10, -3], [3, 1, 3], [0, 2, 0]]),
        (0 + 2 / 2 + 1 / 2) / 3.)

    # Sparse csr matrices
    assert_almost_equal(label_ranking_loss(
        csr_matrix(np.array([[0, 1, 0], [1, 1, 0]])),
        [[0.1, 10, -3], [3, 1, 3]]),
        (0 + 2 / 2) / 2.)
Beispiel #38
0
nlraprecision = []
plrloss = []
plraprecision = []

nscoreone = 0
nscoretwo = 0
correctness = 0
for i in range(0,10):
	p = classifier2.predictor()
	p.learnPredictor()
	n_predicted = p.predict()
	correct = p.mlb.transform(util2.getCorrectGenres(p.testExamples))

	ny_score = np.array(n_predicted)
	y_true = np.array(correct)
	nscoreone += label_ranking_loss(y_true, ny_score)
	nscoretwo += label_ranking_average_precision_score(y_true, ny_score)
	correctness += util2.printCorrectness(p.mlb, p.testExamples, n_predicted, correct)

print "LABEL RANKING LOSS: " + str(float(nscoreone)/10)
print "LABEL RANKING AVERAGE PRECISION: " + str(float(nscoretwo)/10)
print "CORRECTNESS: " + str(float(correctness)/10)
# util2.printAccuracyByGenre(p.mlb, p.testExamples, n_predicted, correct)
# util2.printOutput(p.mlb, p.testExamples, n_predicted, correct)




# print "=========="
# print "PERCENT RESULTS"
# print "LABEL RANKING LOSS:"